From 3b614a235823322fd2ad9e367e36384bd353f9f8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 7 Mar 2016 17:53:28 -0800 Subject: [PATCH 1/8] Use NumTraits::highest() and NumTraits::lowest() instead of the std::numeric_limits to make the tensor min and max functors more CUDA friendly. --- unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 528909688..b24f06df8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -149,11 +149,11 @@ template struct MaxReducer } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return -(std::numeric_limits::max)(); + return Eigen::NumTraits::lowest(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1(-(std::numeric_limits::max)()); + return pset1(initialize()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { return accum; @@ -182,11 +182,11 @@ template struct MinReducer } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return (std::numeric_limits::max)(); + return Eigen::NumTraits::highest(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1((std::numeric_limits::max)()); + return pset1(initialize()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { return accum; @@ -722,6 +722,7 @@ template <> class NormalRandomGenerator > { template class NormalRandomGenerator { public: + static const bool PacketAccess = false; NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {} private: From e09eb835dbf15b7bd0de9dc8786080a2eb377fdb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Mar 2016 12:07:33 -0800 Subject: [PATCH 2/8] Decoupled the packet type definition from the definition of the tensor ops. All the vectorization is now defined in the tensor evaluators. This will make it possible to relialably support devices with different packet types in the same compilation unit. --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 3 --- .../Eigen/CXX11/src/Tensor/TensorAssign.h | 5 +--- .../CXX11/src/Tensor/TensorBroadcasting.h | 5 +--- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 5 +--- .../CXX11/src/Tensor/TensorConcatenation.h | 6 +---- .../CXX11/src/Tensor/TensorContraction.h | 12 +++------ .../CXX11/src/Tensor/TensorContractionCuda.h | 3 +-- .../src/Tensor/TensorContractionMapper.h | 4 +-- .../src/Tensor/TensorContractionThreadPool.h | 3 +-- .../Eigen/CXX11/src/Tensor/TensorConversion.h | 7 ++--- .../CXX11/src/Tensor/TensorConvolution.h | 8 ++---- .../Eigen/CXX11/src/Tensor/TensorCustomOp.h | 12 ++------- .../Eigen/CXX11/src/Tensor/TensorEvalTo.h | 8 ++---- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 26 +++++++++---------- .../Eigen/CXX11/src/Tensor/TensorExpr.h | 13 ---------- .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 2 -- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 8 ++---- .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 5 +--- .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 5 +--- .../Eigen/CXX11/src/Tensor/TensorInflation.h | 6 ++--- .../Eigen/CXX11/src/Tensor/TensorLayoutSwap.h | 5 +--- .../Eigen/CXX11/src/Tensor/TensorMap.h | 2 -- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 14 +++------- .../Eigen/CXX11/src/Tensor/TensorPadding.h | 5 +--- .../Eigen/CXX11/src/Tensor/TensorPatch.h | 5 +--- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 5 +--- .../Eigen/CXX11/src/Tensor/TensorRef.h | 7 ++--- .../Eigen/CXX11/src/Tensor/TensorReverse.h | 7 ++--- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 7 ++--- .../Eigen/CXX11/src/Tensor/TensorStriding.h | 8 +++--- .../CXX11/src/Tensor/TensorVolumePatch.h | 5 +--- 31 files changed, 55 insertions(+), 161 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 17e485f0a..759dede3f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -69,14 +69,11 @@ class Tensor : public TensorBase::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef Scalar_ Scalar; - typedef typename internal::packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; - typedef typename Base::PacketReturnType PacketReturnType; enum { IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign), - PacketAccess = (internal::packet_traits::size > 1), Layout = Options_ & RowMajor ? RowMajor : ColMajor, CoordAccess = true, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index 10fac0cc5..199d2ce41 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -25,7 +25,6 @@ template struct traits > { typedef typename LhsXprType::Scalar Scalar; - typedef typename internal::packet_traits::type Packet; typedef typename traits::StorageKind StorageKind; typedef typename promote_index_type::Index, typename traits::Index>::type Index; @@ -62,10 +61,8 @@ class TensorAssignOp : public TensorBase { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename LhsXprType::CoeffReturnType CoeffReturnType; - typedef typename LhsXprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -110,7 +107,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index efca7cd79..b6e6db12a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -25,7 +25,6 @@ struct traits > : public traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -70,10 +69,8 @@ class TensorBroadcastingOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -144,7 +141,7 @@ struct TensorEvaluator, Device> } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index a209e885b..ba8111316 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -26,7 +26,6 @@ struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -80,10 +79,8 @@ class TensorChippingOp : public TensorBase > { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -184,7 +181,7 @@ struct TensorEvaluator, Device> } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index f57d2bb7d..122306e5c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -26,7 +26,6 @@ struct traits > // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename promote_storage_type::ret Scalar; - typedef typename packet_traits::type Packet; typedef typename promote_storage_type::StorageKind, typename traits::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, @@ -60,14 +59,11 @@ class TensorConcatenationOp : public TensorBase::Scalar Scalar; - typedef typename internal::traits::Packet Packet; typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef typename internal::nested::type Nested; typedef typename internal::promote_storage_type::ret CoeffReturnType; - typedef typename internal::promote_storage_type::ret PacketReturnType; typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) @@ -120,7 +116,7 @@ struct TensorEvaluator Dimensions; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; enum { IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 1adb68894..75bd23412 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -27,7 +27,6 @@ struct traits > // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename internal::promote_storage_type::ret Scalar; - typedef typename internal::packet_traits::type Packet; typedef typename promote_storage_type::StorageKind, typename traits::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, @@ -76,11 +75,8 @@ class TensorContractionOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename internal::promote_storage_type::ret CoeffReturnType; - typedef typename internal::promote_storage_type::ret PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -118,10 +114,9 @@ struct TensorContractionEvaluatorBase typedef TensorContractionOp XprType; typedef typename internal::remove_const::type Scalar; - typedef typename XprType::Packet Packet; typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; enum { IsAligned = true, @@ -434,7 +429,7 @@ struct TensorContractionEvaluatorBase template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt(m_result + index); + return internal::ploadt(m_result + index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; } @@ -478,10 +473,9 @@ struct TensorEvaluator XprType; typedef typename internal::remove_const::type Scalar; - typedef typename XprType::Packet Packet; typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; enum { Layout = TensorEvaluator::Layout, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index f5b539c7e..a4a06ab5f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -1213,10 +1213,9 @@ struct TensorEvaluator XprType; typedef typename internal::remove_const::type Scalar; - typedef typename XprType::Packet Packet; typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; enum { Layout = TensorEvaluator::Layout, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index 392aa6d37..63f40b2b6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -230,8 +230,8 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper::type Packet; - typedef typename packet_traits::half HalfPacket; + typedef typename Tensor::PacketReturnType Packet; + typedef typename unpacket_traits::half HalfPacket; template EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 51a3b9490..41bb704d5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -65,10 +65,9 @@ struct TensorEvaluator XprType; typedef typename internal::remove_const::type Scalar; - typedef typename XprType::Packet Packet; typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; enum { Layout = TensorEvaluator::Layout, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 4e87813a9..f2dee3ee8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -25,7 +25,6 @@ struct traits > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef TargetType Scalar; - typedef typename packet_traits::type Packet; typedef typename traits::StorageKind StorageKind; typedef typename traits::Index Index; typedef typename XprType::Nested Nested; @@ -146,12 +145,10 @@ class TensorConversionOp : public TensorBase::Scalar Scalar; - typedef typename internal::traits::Packet Packet; typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef typename internal::nested::type Nested; typedef Scalar CoeffReturnType; - typedef Packet PacketReturnType; typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) @@ -190,8 +187,8 @@ struct TensorEvaluator, Device> typedef TargetType Scalar; typedef TargetType CoeffReturnType; typedef typename internal::remove_all::Scalar>::type SrcType; - typedef typename internal::traits::Packet PacketReturnType; - typedef typename internal::packet_traits::type PacketSourceType; + typedef typename PacketType::type PacketReturnType; + typedef typename PacketType::type PacketSourceType; enum { IsAligned = false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 67c797802..4fe1fb943 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -221,7 +221,6 @@ struct traits > // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename promote_storage_type::ret Scalar; - typedef typename packet_traits::type Packet; typedef typename promote_storage_type::StorageKind, typename traits::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, @@ -259,12 +258,9 @@ class TensorConvolutionOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::promote_storage_type::ret CoeffReturnType; - typedef typename internal::promote_storage_type::ret PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -373,7 +369,7 @@ struct TensorEvaluator::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -775,7 +771,7 @@ struct TensorEvaluator::type PacketReturnType; typedef typename InputArgType::Scalar Scalar; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index 0f8a98caf..b58e513b4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -24,7 +24,6 @@ template struct traits > { typedef typename XprType::Scalar Scalar; - typedef typename packet_traits::type Packet; typedef typename XprType::StorageKind StorageKind; typedef typename XprType::Index Index; typedef typename XprType::Nested Nested; @@ -54,10 +53,8 @@ class TensorCustomUnaryOp : public TensorBase::Scalar Scalar; - typedef typename internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename internal::nested::type Nested; typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; @@ -105,7 +102,7 @@ struct TensorEvaluator, Devi } typedef typename internal::remove_const::type CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -167,11 +164,8 @@ struct traits > { typedef typename internal::promote_storage_type::ret Scalar; - typedef typename packet_traits::type Packet; typedef typename internal::promote_storage_type::ret CoeffReturnType; - typedef typename internal::promote_storage_type::ret PacketReturnType; typedef typename promote_storage_type::StorageKind, typename traits::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, @@ -205,10 +199,8 @@ class TensorCustomBinaryOp : public TensorBase::Scalar Scalar; - typedef typename internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::traits::CoeffReturnType CoeffReturnType; - typedef typename internal::traits::PacketReturnType PacketReturnType; typedef typename internal::nested::type Nested; typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; @@ -261,7 +253,7 @@ struct TensorEvaluator::type CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index bd83d5de8..5d73d62d2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -26,7 +26,6 @@ struct traits > // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -61,10 +60,8 @@ class TensorEvalToOp : public TensorBase > { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -90,7 +87,6 @@ struct TensorEvaluator, Device> { typedef TensorEvalToOp XprType; typedef typename ArgType::Scalar Scalar; - typedef typename ArgType::Packet Packet; typedef typename TensorEvaluator::Dimensions Dimensions; enum { @@ -110,7 +106,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } @@ -138,7 +134,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt(m_buffer + index); + return internal::ploadt(m_buffer + index); } EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index f726585b1..d8afdcd1b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -29,9 +29,8 @@ struct TensorEvaluator { typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Packet Packet; typedef typename Derived::Scalar CoeffReturnType; - typedef typename Derived::Packet PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; // NumDimensions is -1 for variable dim tensors @@ -40,7 +39,7 @@ struct TensorEvaluator enum { IsAligned = Derived::IsAligned, - PacketAccess = Derived::PacketAccess, + PacketAccess = (internal::unpacket_traits::size > 1), Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -75,13 +74,13 @@ struct TensorEvaluator template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt(m_data + index); + return internal::ploadt(m_data + index); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const Packet& x) + void writePacket(Index index, const PacketReturnType& x) { - return internal::pstoret(m_data + index, x); + return internal::pstoret(m_data + index, x); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { @@ -135,9 +134,8 @@ struct TensorEvaluator { typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Packet Packet; typedef typename Derived::Scalar CoeffReturnType; - typedef typename Derived::Packet PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; // NumDimensions is -1 for variable dim tensors @@ -146,7 +144,7 @@ struct TensorEvaluator enum { IsAligned = Derived::IsAligned, - PacketAccess = Derived::PacketAccess, + PacketAccess = (internal::unpacket_traits::size > 1), Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -176,7 +174,7 @@ struct TensorEvaluator template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt_ro(m_data + index); + return internal::ploadt_ro(m_data + index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { @@ -220,7 +218,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; - typedef typename internal::traits::Packet PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } @@ -271,7 +269,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename internal::traits::Scalar CoeffReturnType; - typedef typename internal::traits::Packet PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } @@ -331,7 +329,7 @@ struct TensorEvaluator::Scalar CoeffReturnType; - typedef typename internal::traits::Packet PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const @@ -399,7 +397,7 @@ struct TensorEvaluator typedef typename XprType::Index Index; typedef typename internal::traits::Scalar CoeffReturnType; - typedef typename internal::traits::Packet PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename TensorEvaluator::Dimensions Dimensions; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index 194c68929..49d849e23 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -32,7 +32,6 @@ template struct traits > : traits { - typedef typename XprType::Packet Packet; typedef traits XprTraits; typedef typename XprType::Scalar Scalar; typedef typename XprType::Nested XprTypeNested; @@ -54,10 +53,8 @@ class TensorCwiseNullaryOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef TensorCwiseNullaryOp Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -88,7 +85,6 @@ struct traits > // current Scalar/Packet to see if the intent is Input or Output. typedef typename result_of::type Scalar; typedef traits XprTraits; - typedef typename internal::packet_traits::type Packet; typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference::type _XprTypeNested; static const int NumDimensions = XprTraits::NumDimensions; @@ -118,10 +114,8 @@ class TensorCwiseUnaryOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef Scalar CoeffReturnType; - typedef typename internal::packet_traits::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -155,7 +149,6 @@ struct traits > BinaryOp(typename LhsXprType::Scalar, typename RhsXprType::Scalar)>::type Scalar; typedef traits XprTraits; - typedef typename internal::packet_traits::type Packet; typedef typename promote_storage_type< typename traits::StorageKind, typename traits::StorageKind>::ret StorageKind; @@ -197,10 +190,8 @@ class TensorCwiseBinaryOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef Scalar CoeffReturnType; - typedef typename internal::packet_traits::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -234,7 +225,6 @@ struct traits > { typedef typename traits::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename promote_storage_type::StorageKind, typename traits::StorageKind>::ret StorageKind; typedef typename promote_index_type::Index, @@ -266,12 +256,9 @@ class TensorSelectOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::promote_storage_type::ret CoeffReturnType; - typedef typename internal::promote_storage_type::ret PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 70282dd83..9c0ed43b7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -33,7 +33,6 @@ class TensorFixedSize : public TensorBase::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef Scalar_ Scalar; - typedef typename internal::packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; @@ -41,7 +40,6 @@ class TensorFixedSize : public TensorBase0), - PacketAccess = (internal::packet_traits::size > 1), Layout = Options_ & RowMajor ? RowMajor : ColMajor, CoordAccess = true, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 58b864787..14f480901 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -26,7 +26,6 @@ struct traits > // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename traits::StorageKind StorageKind; typedef typename traits::Index Index; typedef typename XprType::Nested Nested; @@ -60,10 +59,8 @@ class TensorForcedEvalOp : public TensorBase > { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -85,7 +82,6 @@ struct TensorEvaluator, Device> { typedef TensorForcedEvalOp XprType; typedef typename ArgType::Scalar Scalar; - typedef typename ArgType::Packet Packet; typedef typename TensorEvaluator::Dimensions Dimensions; enum { @@ -101,7 +97,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } @@ -133,7 +129,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt(m_buffer + index); + return internal::ploadt(m_buffer + index); } EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index 96f74b992..4c11bca07 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -25,7 +25,6 @@ struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorGeneratorOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -118,7 +115,7 @@ struct TensorEvaluator, Device> } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index bc6021c9e..0008f9890 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -32,7 +32,6 @@ struct traits > : public traits { typedef typename internal::remove_const::type Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -60,10 +59,8 @@ class TensorImagePatchOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -311,7 +308,7 @@ struct TensorEvaluator, Device> } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h index 2798956ae..368e6f685 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -25,7 +25,6 @@ struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -53,10 +52,8 @@ class TensorInflationOp : public TensorBase, { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -128,7 +125,8 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index a37516974..c5e29fe74 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -40,7 +40,6 @@ struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -70,10 +69,8 @@ class TensorLayoutSwapOp : public TensorBase, WriteA { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -136,7 +133,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 4a199cdd8..9ebd9172b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -28,7 +28,6 @@ template class TensorMap : public Tensor typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; - typedef typename internal::packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; @@ -47,7 +46,6 @@ template class TensorMap : public Tensor enum { IsAligned = ((int(Options_)&Aligned)==Aligned), - PacketAccess = (internal::packet_traits::size > 1), Layout = PlainObjectType::Layout, CoordAccess = true, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index e867e450e..afde7b3d2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -25,7 +25,6 @@ struct traits > : public traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorReshapingOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -124,7 +121,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -181,7 +178,7 @@ template typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { @@ -208,7 +205,6 @@ struct traits > : public traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -238,10 +234,8 @@ class TensorSlicingOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -361,7 +355,7 @@ struct TensorEvaluator, Devi typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef Sizes Dimensions; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -549,7 +543,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef Sizes Dimensions; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index eaaf4dc86..a595a0175 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -25,7 +25,6 @@ struct traits > : public traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorPaddingOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -134,7 +131,7 @@ struct TensorEvaluator, Device typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h index 57b716fd6..0bf460f4e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -25,7 +25,6 @@ struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorPatchOp : public TensorBase, ReadOn { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -141,7 +138,7 @@ struct TensorEvaluator, Device> } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index d01a63ccb..4f2801e53 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -26,7 +26,6 @@ struct traits > { typedef traits XprTraits; typedef typename XprTraits::Scalar Scalar; - typedef typename internal::packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -381,10 +380,8 @@ template class TensorReductionOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -509,7 +506,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(CoeffReturnType* data) { m_impl.evalSubExprsIfNeeded(NULL); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h index 57197d060..bc92d9e6d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h @@ -125,7 +125,6 @@ template class TensorRef : public TensorBase::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; - typedef typename internal::packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; typedef typename Base::CoeffReturnType CoeffReturnType; typedef Scalar* PointerType; @@ -358,9 +357,8 @@ struct TensorEvaluator, Device> { typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Packet Packet; typedef typename Derived::Scalar CoeffReturnType; - typedef typename Derived::Packet PacketReturnType; + typedef typename PacketType::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; enum { @@ -404,9 +402,8 @@ struct TensorEvaluator, Device> : public TensorEvaluator::type PacketReturnType; typedef typename Derived::Dimensions Dimensions; typedef TensorEvaluator, Device> Base; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index 846f81e0f..96d92038c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -25,7 +25,6 @@ struct traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorReverseOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; @@ -140,7 +137,7 @@ struct TensorEvaluator, Device typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -248,7 +245,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return this->m_dimensions; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index c4adb7d4c..c19833ea5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -25,7 +25,6 @@ struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorShufflingOp : public TensorBase { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -149,7 +146,7 @@ struct TensorEvaluator, Device> } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -234,7 +231,7 @@ struct TensorEvaluator, Device> { } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index 2c2eb6515..085f8fd3d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -25,7 +25,6 @@ struct traits > : public traits { typedef typename XprType::Scalar Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorStridingOp : public TensorBase > { public: typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -147,7 +144,7 @@ struct TensorEvaluator, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -267,7 +264,8 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h index 04f4f8ffc..5bdfbad46 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -27,7 +27,6 @@ struct traits > : public traits { typedef typename internal::remove_const::type Scalar; typedef traits XprTraits; - typedef typename packet_traits::type Packet; typedef typename XprTraits::StorageKind StorageKind; typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; @@ -55,10 +54,8 @@ class TensorVolumePatchOp : public TensorBase::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -340,7 +337,7 @@ struct TensorEvaluator, D } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } From 8768c063f5607f27b899102abf472815981cf788 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Mar 2016 12:26:49 -0800 Subject: [PATCH 3/8] Fixed the tensor chipping code. --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 2 -- unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 10 +++++----- .../Eigen/CXX11/src/Tensor/TensorContractionMapper.h | 8 ++++---- .../CXX11/src/Tensor/TensorContractionThreadPool.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 2 +- 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 66772a3ad..c854afd2f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -31,7 +31,6 @@ class TensorBase typedef typename DerivedTraits::Scalar Scalar; typedef typename DerivedTraits::Index Index; typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::packet_traits::type PacketReturnType; static const int NumDimensions = DerivedTraits::NumDimensions; // Generic nullary operation support. @@ -706,7 +705,6 @@ class TensorBase : public TensorBase::type PacketReturnType; static const int NumDimensions = DerivedTraits::NumDimensions; template friend class Tensor; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index ba8111316..5023371ae 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -310,7 +310,7 @@ struct TensorEvaluator, Device> { } typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 75bd23412..18b20b2dc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -120,7 +120,7 @@ struct TensorContractionEvaluatorBase enum { IsAligned = true, - PacketAccess = (internal::packet_traits::size > 1), + PacketAccess = (internal::unpacket_traits::size > 1), Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = true @@ -381,8 +381,8 @@ struct TensorContractionEvaluatorBase typedef typename internal::remove_const::type RhsScalar; typedef TensorEvaluator LeftEvaluator; typedef TensorEvaluator RightEvaluator; - const Index lhs_packet_size = internal::packet_traits::size; - const Index rhs_packet_size = internal::packet_traits::size; + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned; const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned; typedef internal::TensorContractionInputMapper LeftEvaluator; typedef TensorEvaluator RightEvaluator; - const Index lhs_packet_size = internal::packet_traits::size; - const Index rhs_packet_size = internal::packet_traits::size; + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; typedef internal::TensorContractionInputMapper::type Packet; + typedef typename Tensor::PacketReturnType Packet; template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { EIGEN_ALIGN_MAX Scalar data[1]; data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); - return pload::type>(data); + return pload(data); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { @@ -334,8 +334,8 @@ template class TensorContractionSubMapper { public: - typedef typename packet_traits::type Packet; - typedef typename packet_traits::half HalfPacket; + typedef typename Tensor::PacketReturnType Packet; + typedef typename unpacket_traits::half HalfPacket; typedef BaseTensorContractionMapper ParentMapper; typedef TensorContractionSubMapper Self; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 41bb704d5..02b3c6dea 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -135,8 +135,8 @@ struct TensorEvaluatorm_device.memset(buffer, 0, m * n * sizeof(Scalar)); - const int lhs_packet_size = internal::packet_traits::size; - const int rhs_packet_size = internal::packet_traits::size; + const int lhs_packet_size = internal::unpacket_traits::size; + const int rhs_packet_size = internal::unpacket_traits::size; typedef internal::TensorContractionInputMapper, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_functor.template packetOp(index); + return m_functor.template packetOp(index); } EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } From 551ff11d0d1ad8025de77166ea2ec86874cb717d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Mar 2016 12:28:10 -0800 Subject: [PATCH 4/8] Fixed the tensor layout swapping code --- unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index c5e29fe74..9b85914ff 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -187,7 +187,7 @@ template typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename PacketType::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { From a81b88bef7d539e4050358d4c0e17c61c6ed3141 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Mar 2016 12:30:19 -0800 Subject: [PATCH 5/8] Fixed the tensor concatenation code --- unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 122306e5c..7738f18fb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -296,7 +296,7 @@ template::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { From 5a427a94a9c04f5cc32c185c9eebe10e40956d5e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Mar 2016 13:28:06 -0800 Subject: [PATCH 6/8] Fixed the tensor generator code --- unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index 4c11bca07..e4154bd0b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -85,10 +85,11 @@ struct TensorEvaluator, Device> typedef typename TensorEvaluator::Dimensions Dimensions; static const int NumDims = internal::array_size::value; typedef typename XprType::Scalar Scalar; - + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; enum { IsAligned = false, - PacketAccess = (internal::packet_traits::size > 1), + PacketAccess = (internal::unpacket_traits::size > 1), BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -114,9 +115,6 @@ struct TensorEvaluator, Device> } } - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { From 6d6413f76832a094d0835770af2adfaabba24738 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Mar 2016 16:02:00 -0800 Subject: [PATCH 7/8] Simplified the full reduction code --- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 139 +++++++++--------- 1 file changed, 69 insertions(+), 70 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 4f2801e53..875155243 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -221,121 +221,120 @@ struct FullReducer { #ifdef EIGEN_USE_THREADS // Multithreaded full reducers -template +template struct FullReducerShard { - static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) { - - shard->saccum = reducer.initialize(); - for (typename Eval::Index j = 0; j < numValuesToReduce; ++j) { - reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum); - } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex, + typename Self::Index numValuesToReduce, Op& reducer, + typename Self::CoeffReturnType* output) { + *output = InnerMostDimReducer::reduce( + self, firstIndex, numValuesToReduce, reducer); } - - typename Eval::CoeffReturnType saccum; }; -template -struct FullReducerShard { - static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) { - - const int packetSize = internal::unpacket_traits::size; - const typename Eval::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; - - shard->paccum = reducer.template initializePacket(); - for (typename Eval::Index j = 0; j < VectorizedSize; j += packetSize) { - reducer.reducePacket(eval.m_impl.template packet(firstIndex + j), &shard->paccum); - } - shard->saccum = reducer.initialize(); - for (typename Eval::Index j = VectorizedSize; j < numValuesToReduce; ++j) { - reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum); - } - } - - typename Eval::PacketReturnType paccum; - typename Eval::CoeffReturnType saccum; -}; - - template struct FullReducer { static const bool HasOptimizedImplementation = !Op::IsStateful; + static const int PacketSize = + unpacket_traits::size; // launch one reducer per thread and accumulate the result. - static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) { + static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, + typename Self::CoeffReturnType* output) { typedef typename Self::Index Index; const Index num_coeffs = array_prod(self.m_impl.dimensions()); - const Index blocksize = std::floor(static_cast(num_coeffs)/device.numThreads()); - const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; - eigen_assert(num_coeffs >= numblocks * blocksize); - - std::vector results; - results.reserve(numblocks); - std::vector > shards; - shards.resize(numblocks); - for (Index i = 0; i < numblocks; ++i) { - results.push_back(device.enqueue(&FullReducerShard::run, self, i*blocksize, blocksize, reducer, &shards[i])); + if (num_coeffs == 0) { + *output = reducer.finalize(reducer.initialize()); + return; } - - FullReducerShard finalShard; - if (numblocks * blocksize < num_coeffs) { - FullReducerShard::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard); + const int num_threads = device.numThreads(); + if (num_threads == 1) { + *output = InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + return; } else { - finalShard.saccum = reducer.initialize(); - } + const Index blocksize = std::floor(static_cast(num_coeffs) / num_threads); + const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; + eigen_assert(num_coeffs >= numblocks * blocksize); - for (Index i = 0; i < numblocks; ++i) { - wait_until_ready(results[i]); - delete results[i]; - } + std::vector results; + results.reserve(numblocks); + std::vector shards(numblocks, reducer.initialize()); + for (Index i = 0; i < numblocks; ++i) { + results.push_back( + device.enqueue(&FullReducerShard::run, self, + i * blocksize, blocksize, reducer, &shards[i])); + } - for (Index i = 0; i < numblocks; ++i) { - reducer.reduce(shards[i].saccum, &finalShard.saccum); + typename Self::CoeffReturnType finalShard; + if (numblocks * blocksize < num_coeffs) { + finalShard = InnerMostDimReducer::reduce( + self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer); + } else { + finalShard = reducer.initialize(); + } + for (Index i = 0; i < numblocks; ++i) { + wait_until_ready(results[i]); + delete results[i]; + } + for (Index i = 0; i < numblocks; ++i) { + reducer.reduce(shards[i], &finalShard); + } + *output = reducer.finalize(finalShard); } - *output = reducer.finalize(finalShard.saccum); } }; template struct FullReducer { static const bool HasOptimizedImplementation = !Op::IsStateful; + static const int PacketSize = + unpacket_traits::size; // launch one reducer per thread and accumulate the result. - static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) { + static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, + typename Self::CoeffReturnType* output) { typedef typename Self::Index Index; const Index num_coeffs = array_prod(self.m_impl.dimensions()); - const Index blocksize = std::floor(static_cast(num_coeffs)/device.numThreads()); + if (num_coeffs == 0) { + *output = reducer.finalize(reducer.initialize()); + return; + } + const int num_threads = device.numThreads(); + if (num_threads == 1) { + *output = InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + return; + } + const Index blocksize = std::floor(static_cast(num_coeffs) / num_threads); const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; eigen_assert(num_coeffs >= numblocks * blocksize); std::vector results; results.reserve(numblocks); - std::vector > shards; - shards.resize(numblocks); + std::vector shards(numblocks, reducer.initialize()); for (Index i = 0; i < numblocks; ++i) { - results.push_back(device.enqueue(&FullReducerShard::run, self, i*blocksize, blocksize, reducer, &shards[i])); + results.push_back(device.enqueue(&FullReducerShard::run, + self, i * blocksize, blocksize, reducer, + &shards[i])); } - - FullReducerShard finalShard; + typename Self::CoeffReturnType finalShard; if (numblocks * blocksize < num_coeffs) { - FullReducerShard::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard); + finalShard = InnerMostDimReducer::reduce( + self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer); } else { - finalShard.paccum = reducer.template initializePacket(); - finalShard.saccum = reducer.initialize(); + finalShard = reducer.initialize(); } for (Index i = 0; i < numblocks; ++i) { wait_until_ready(results[i]); delete results[i]; } - for (Index i = 0; i < numblocks; ++i) { - reducer.reducePacket(shards[i].paccum, &finalShard.paccum); - reducer.reduce(shards[i].saccum, &finalShard.saccum); + reducer.reduce(shards[i], &finalShard); } - - *output = reducer.finalizeBoth(finalShard.saccum, finalShard.paccum); + *output = reducer.finalize(finalShard); } }; + #endif From 46177c8d648a27d82d34cebed7e2b5bc59d441fc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Mar 2016 16:37:27 -0800 Subject: [PATCH 8/8] Replace std::vector with our own implementation, as using the stl when compiling with nvcc and avx enabled leads to many issues. --- unsupported/Eigen/CXX11/Core | 1 + .../Eigen/CXX11/src/Core/util/MaxSizeVector.h | 130 ++++++++++++++++++ .../src/Tensor/TensorContractionThreadPool.h | 16 +-- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 3 +- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 10 +- 5 files changed, 143 insertions(+), 17 deletions(-) create mode 100644 unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h diff --git a/unsupported/Eigen/CXX11/Core b/unsupported/Eigen/CXX11/Core index e3e2cb60c..946145f5a 100644 --- a/unsupported/Eigen/CXX11/Core +++ b/unsupported/Eigen/CXX11/Core @@ -33,6 +33,7 @@ #include #include "src/Core/util/EmulateArray.h" +#include "src/Core/util/MaxSizeVector.h" // Emulate the cxx11 functionality that we need if the compiler doesn't support it. // Visual studio 2015 doesn't advertise itself as cxx11 compliant, although it diff --git a/unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h new file mode 100644 index 000000000..551124bae --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Core/util/MaxSizeVector.h @@ -0,0 +1,130 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FIXEDSIZEVECTOR_H +#define EIGEN_FIXEDSIZEVECTOR_H + +namespace Eigen { + +/** \class MaxSizeVector + * \ingroup Core + * + * \brief The MaxSizeVector class. + * + * The %MaxSizeVector provides a subset of std::vector functionality. + * + * The goal is to provide basic std::vector operations when using + * std::vector is not an option (e.g. on GPU or when compiling using + * FMA/AVX, as this can cause either compilation failures or illegal + * instruction failures). + * + * Beware: The constructors are not API compatible with these of + * std::vector. + */ +template +class MaxSizeVector { + public: + // Construct a new MaxSizeVector, reserve n elements. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit MaxSizeVector(size_t n) + : reserve_(n), size_(0), + data_(static_cast(internal::aligned_malloc(n * sizeof(T)))) { + for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; } + } + + // Construct a new MaxSizeVector, reserve and resize to n. + // Copy the init value to all elements. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit MaxSizeVector(size_t n, const T& init) + : reserve_(n), size_(n), + data_(static_cast(internal::aligned_malloc(n * sizeof(T)))) { + for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + ~MaxSizeVector() { + for (size_t i = 0; i < size_; ++i) { + data_[i].~T(); + } + internal::aligned_free(data_); + } + + // Append new elements (up to reserved size). + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void push_back(const T& t) { + eigen_assert(size_ < reserve_); + data_[size_++] = t; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T& operator[] (size_t i) const { + eigen_assert(i < size_); + return data_[i]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T& operator[] (size_t i) { + eigen_assert(i < size_); + return data_[i]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T& back() { + eigen_assert(size_ > 0); + return data_[size_ - 1]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T& back() const { + eigen_assert(size_ > 0); + return data_[size_ - 1]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void pop_back() { + // NOTE: This does not destroy the value at the end the way + // std::vector's version of pop_back() does. That happens when + // the Vector is destroyed. + eigen_assert(size_ > 0); + size_--; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t size() const { return size_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + bool empty() const { return size_ == 0; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T* data() { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T* data() const { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T* begin() { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T* end() { return data_ + size_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T* begin() const { return data_; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const T* end() const { return data_ + size_; } + + private: + size_t reserve_; + size_t size_; + T* data_; +}; + +} // namespace Eigen + +#endif // EIGEN_FIXEDSIZEVECTOR_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 02b3c6dea..9044454fd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -28,7 +28,7 @@ struct packLhsArg { template struct packRhsAndKernelArg { - const std::vector* blockAs; + const MaxSizeVector* blockAs; RhsScalar* blockB; const RhsMapper& rhs; OutputMapper& output; @@ -46,8 +46,8 @@ struct packRhsAndKernelArg { const Index n_block_idx; const Index m_blocks; const Index n_blocks; - std::vector* kernel_notifications; - const std::vector* lhs_notifications; + MaxSizeVector* kernel_notifications; + const MaxSizeVector* lhs_notifications; const bool need_to_pack; }; @@ -202,8 +202,7 @@ struct TensorEvaluator blockAs; - blockAs.reserve(num_threads); + MaxSizeVector blockAs(num_threads); for (int i = 0; i < num_threads; i++) { blockAs.push_back(static_cast(this->m_device.allocate(sizeA * sizeof(LhsScalar)))); } @@ -212,18 +211,17 @@ struct TensorEvaluator blockBs; - blockBs.reserve(n_blocks); + MaxSizeVector blockBs(n_blocks); for (int i = 0; i < n_blocks; i++) { blockBs.push_back(static_cast(this->m_device.allocate(sizeB * sizeof(RhsScalar)))); } // lhs_notifications starts with all null Notifications - std::vector lhs_notifications(num_threads, nullptr); + MaxSizeVector lhs_notifications(num_threads, nullptr); // this should really be numBlockAs * n_blocks; const Index num_kernel_notifications = num_threads * n_blocks; - std::vector kernel_notifications(num_kernel_notifications, + MaxSizeVector kernel_notifications(num_kernel_notifications, nullptr); for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index fd9919829..54da77bcf 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -127,8 +127,7 @@ class TensorExecutor const Index blocksize = numext::maxi(PacketSize, (blocksz - (blocksz % PacketSize))); const Index numblocks = size / blocksize; - std::vector results; - results.reserve(numblocks); + MaxSizeVector results(numblocks); for (int i = 0; i < numblocks; ++i) { results.push_back(device.enqueue(&EvalRange::run, evaluator, i*blocksize, (i+1)*blocksize)); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 875155243..2d7fb80d4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -256,9 +256,8 @@ struct FullReducer { const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; eigen_assert(num_coeffs >= numblocks * blocksize); - std::vector results; - results.reserve(numblocks); - std::vector shards(numblocks, reducer.initialize()); + MaxSizeVector results(numblocks); + MaxSizeVector shards(numblocks, reducer.initialize()); for (Index i = 0; i < numblocks; ++i) { results.push_back( device.enqueue(&FullReducerShard::run, self, @@ -308,9 +307,8 @@ struct FullReducer { const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; eigen_assert(num_coeffs >= numblocks * blocksize); - std::vector results; - results.reserve(numblocks); - std::vector shards(numblocks, reducer.initialize()); + MaxSizeVector results(numblocks); + MaxSizeVector shards(numblocks, reducer.initialize()); for (Index i = 0; i < numblocks; ++i) { results.push_back(device.enqueue(&FullReducerShard::run, self, i * blocksize, blocksize, reducer,