mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
merge
This commit is contained in:
commit
79cb875249
@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(cons
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, int stride) {
|
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
|
||||||
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, int stride) {
|
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
|
||||||
return make_double2(from[0*stride], from[1*stride]);
|
return make_double2(from[0*stride], from[1*stride]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, int stride) {
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
|
||||||
to[stride*0] = from.x;
|
to[stride*0] = from.x;
|
||||||
to[stride*1] = from.y;
|
to[stride*1] = from.y;
|
||||||
to[stride*2] = from.z;
|
to[stride*2] = from.z;
|
||||||
to[stride*3] = from.w;
|
to[stride*3] = from.w;
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, int stride) {
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
|
||||||
to[stride*0] = from.x;
|
to[stride*0] = from.x;
|
||||||
to[stride*1] = from.y;
|
to[stride*1] = from.y;
|
||||||
}
|
}
|
||||||
@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
|
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
|
||||||
return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w));
|
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||||
return make_double2(abs(a.x), abs(a.y));
|
return make_double2(fabs(a.x), fabs(a.y));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void
|
EIGEN_DEVICE_FUNC inline void
|
||||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||||
double tmp = kernel.packet[0].y;
|
double tmp = kernel.packet[0].y;
|
||||||
kernel.packet[0].y = kernel.packet[1].x;
|
kernel.packet[0].y = kernel.packet[1].x;
|
||||||
@ -279,7 +279,7 @@ ptranspose(PacketBlock<float4,4>& kernel) {
|
|||||||
kernel.packet[3].z = tmp;
|
kernel.packet[3].z = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void
|
EIGEN_DEVICE_FUNC inline void
|
||||||
ptranspose(PacketBlock<double2,2>& kernel) {
|
ptranspose(PacketBlock<double2,2>& kernel) {
|
||||||
double tmp = kernel.packet[0].y;
|
double tmp = kernel.packet[0].y;
|
||||||
kernel.packet[0].y = kernel.packet[1].x;
|
kernel.packet[0].y = kernel.packet[1].x;
|
||||||
|
@ -80,8 +80,8 @@
|
|||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
|
||||||
|
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
|
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
|
||||||
|
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h"
|
||||||
|
@ -32,8 +32,7 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
|||||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||||
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
||||||
Assign assign(m_expression, other);
|
Assign assign(m_expression, other);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
|
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
|
||||||
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,8 +43,7 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
|||||||
Sum sum(m_expression, other);
|
Sum sum(m_expression, other);
|
||||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||||
Assign assign(m_expression, sum);
|
Assign assign(m_expression, sum);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
|
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
|
||||||
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,8 +54,7 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
|||||||
Difference difference(m_expression, other);
|
Difference difference(m_expression, other);
|
||||||
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
||||||
Assign assign(m_expression, difference);
|
Assign assign(m_expression, difference);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
|
internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
|
||||||
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,8 +73,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
|
|||||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||||
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
||||||
Assign assign(m_expression, other);
|
Assign assign(m_expression, other);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
|
||||||
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,8 +84,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
|
|||||||
Sum sum(m_expression, other);
|
Sum sum(m_expression, other);
|
||||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||||
Assign assign(m_expression, sum);
|
Assign assign(m_expression, sum);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
|
||||||
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,8 +95,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
|
|||||||
Difference difference(m_expression, other);
|
Difference difference(m_expression, other);
|
||||||
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
||||||
Assign assign(m_expression, difference);
|
Assign assign(m_expression, difference);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, ThreadPoolDevice>::run(assign, m_device);
|
||||||
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,7 +116,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
|||||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||||
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
|
||||||
Assign assign(m_expression, other);
|
Assign assign(m_expression, other);
|
||||||
internal::TensorExecutor<const Assign, GpuDevice, false>::run(assign, m_device);
|
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,7 +127,7 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
|||||||
Sum sum(m_expression, other);
|
Sum sum(m_expression, other);
|
||||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||||
Assign assign(m_expression, sum);
|
Assign assign(m_expression, sum);
|
||||||
internal::TensorExecutor<const Assign, GpuDevice, false>::run(assign, m_device);
|
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,14 +138,13 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
|||||||
Difference difference(m_expression, other);
|
Difference difference(m_expression, other);
|
||||||
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
typedef TensorAssignOp<ExpressionType, const Difference> Assign;
|
||||||
Assign assign(m_expression, difference);
|
Assign assign(m_expression, difference);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, GpuDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, GpuDevice>::run(assign, m_device);
|
||||||
internal::TensorExecutor<const Assign, GpuDevice, Vectorize>::run(assign, m_device);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const GpuDevice& m_device;
|
const GpuDevice& m_device;
|
||||||
ExpressionType m_expression;
|
ExpressionType& m_expression;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -352,11 +352,12 @@ template<typename IfArgType, typename ThenArgType, typename ElseArgType, typenam
|
|||||||
struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
|
struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
|
||||||
{
|
{
|
||||||
typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
|
typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
|
||||||
|
typedef typename XprType::Scalar Scalar;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess/* &
|
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
|
||||||
TensorEvaluator<IfArgType>::PacketAccess*/,
|
internal::packet_traits<Scalar>::HasBlend,
|
||||||
Layout = TensorEvaluator<IfArgType, Device>::Layout,
|
Layout = TensorEvaluator<IfArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
};
|
};
|
||||||
@ -373,7 +374,6 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
|||||||
}
|
}
|
||||||
|
|
||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
typedef typename XprType::Scalar Scalar;
|
|
||||||
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
|
||||||
typedef typename internal::traits<XprType>::Packet PacketReturnType;
|
typedef typename internal::traits<XprType>::Packet PacketReturnType;
|
||||||
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
|
||||||
@ -403,7 +403,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
|||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
||||||
{
|
{
|
||||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
internal::Selector<PacketSize> select;
|
internal::Selector<PacketSize> select;
|
||||||
for (Index i = 0; i < PacketSize; ++i) {
|
for (Index i = 0; i < PacketSize; ++i) {
|
||||||
select.select[i] = m_condImpl.coeff(index+i);
|
select.select[i] = m_condImpl.coeff(index+i);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user