diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 19749c832..ceed1d1ef 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -197,21 +197,21 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro(cons } #endif -template<> EIGEN_DEVICE_FUNC inline float4 pgather(const float* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline float4 pgather(const float* from, Index stride) { return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); } -template<> EIGEN_DEVICE_FUNC inline double2 pgather(const double* from, int stride) { +template<> EIGEN_DEVICE_FUNC inline double2 pgather(const double* from, Index stride) { return make_double2(from[0*stride], from[1*stride]); } -template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const float4& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter(float* to, const float4& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; to[stride*2] = from.z; to[stride*3] = from.w; } -template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const double2& from, int stride) { +template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, const double2& from, Index stride) { to[stride*0] = from.x; to[stride*1] = from.y; } @@ -245,14 +245,14 @@ template<> EIGEN_DEVICE_FUNC inline double predux_min(const double2& a) } template<> EIGEN_DEVICE_FUNC inline float4 pabs(const float4& a) { - return make_float4(fabs(a.x), fabs(a.y), fabs(a.z), fabs(a.w)); + return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); } template<> EIGEN_DEVICE_FUNC inline double2 pabs(const double2& a) { - return make_double2(abs(a.x), abs(a.y)); + return make_double2(fabs(a.x), fabs(a.y)); } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; @@ -279,7 +279,7 @@ ptranspose(PacketBlock& kernel) { kernel.packet[3].z = tmp; } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { double tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 34107ae71..200bcf966 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -80,8 +80,8 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h" -#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h" #include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 7a67c56b3..17f10c07b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -32,8 +32,7 @@ template class TensorDevice { EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { typedef TensorAssignOp Assign; Assign assign(m_expression, other); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -44,8 +43,7 @@ template class TensorDevice { Sum sum(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, sum); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -56,8 +54,7 @@ template class TensorDevice { Difference difference(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, difference); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -76,8 +73,7 @@ template class TensorDevice Assign; Assign assign(m_expression, other); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -88,8 +84,7 @@ template class TensorDevice Assign; Assign assign(m_expression, sum); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -100,8 +95,7 @@ template class TensorDevice Assign; Assign assign(m_expression, difference); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -122,7 +116,7 @@ template class TensorDevice EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { typedef TensorAssignOp Assign; Assign assign(m_expression, other); - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -133,7 +127,7 @@ template class TensorDevice Sum sum(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, sum); - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } @@ -144,14 +138,13 @@ template class TensorDevice Difference difference(m_expression, other); typedef TensorAssignOp Assign; Assign assign(m_expression, difference); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, m_device); + internal::TensorExecutor::run(assign, m_device); return *this; } protected: const GpuDevice& m_device; - ExpressionType m_expression; + ExpressionType& m_expression; }; #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index d084880de..9198c17ef 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -352,11 +352,12 @@ template, Device> { typedef TensorSelectOp XprType; + typedef typename XprType::Scalar Scalar; enum { IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess/* & - TensorEvaluator::PacketAccess*/, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::packet_traits::HasBlend, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented }; @@ -373,7 +374,6 @@ struct TensorEvaluator } typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename internal::traits::Packet PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; @@ -403,7 +403,7 @@ struct TensorEvaluator template EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - static const int PacketSize = internal::unpacket_traits::size; + const int PacketSize = internal::unpacket_traits::size; internal::Selector select; for (Index i = 0; i < PacketSize; ++i) { select.select[i] = m_condImpl.coeff(index+i);