diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f589555cc..574f945d5 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -29,10 +29,13 @@ struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; typedef typename Dst::Scalar DstScalar; + // TODO recursively find best packet size + typedef typename packet_traits::type DstPacket; + enum { DstFlags = DstEvaluator::Flags, SrcFlags = SrcEvaluator::Flags, - RequiredAlignment = packet_traits::size*sizeof(DstScalar) // FIXME ask packet_traits for the true alignment requirement + RequiredAlignment = unpacket_traits::alignment }; public: diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 910d04ecb..a599a6a69 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -638,6 +638,8 @@ struct evaluator > { typedef Map XprType; typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; enum { InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 @@ -651,21 +653,18 @@ struct evaluator > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - // TODO: should check for smaller packet types once we can handle multi-sized packet types - AlignBytes = int(packet_traits::size) * sizeof(Scalar), + PacketAlignment = unpacket_traits::alignment, KeepsPacketAccess = bool(HasNoInnerStride) && ( bool(IsDynamicSize) || HasNoOuterStride || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ), + && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ), Flags0 = evaluator::Flags, - //Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) ? int(Flags0) : int(Flags0 & ~LinearAccessBit), Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit), - //IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&int(AlignedMask))>0), Alignment = int(MapOptions)&int(AlignedMask) }; @@ -702,7 +701,9 @@ struct evaluator > : block_evaluator { typedef Block XprType; - typedef typename XprType::Scalar Scalar; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; enum { CoeffReadCost = evaluator::CoeffReadCost, @@ -735,9 +736,8 @@ struct evaluator > MaskPacketAccessBit), Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, - // TODO: should check for smaller packet types once we can handle multi-sized packet types - AlignBytes = int(packet_traits::size) * sizeof(Scalar), - Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignBytes : 0, + PacketAlignment = unpacket_traits::alignment, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index d053911e3..d7ccb3b6f 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -614,7 +614,8 @@ template static inline Index first_default_aligned(const DenseBase& m) { typedef typename Derived::Scalar Scalar; - return first_aligned::size*sizeof(Scalar)>(m); + typedef typename packet_traits::type DefaultPacketType; + return first_aligned::alignment>(m); } template::ret> diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 4cc5f656f..c656f61bf 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -454,7 +454,7 @@ pmadd(const Packet& a, template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) { - if(Alignment >= unpacket_traits::size*sizeof(typename unpacket_traits::type)) + if(Alignment >= unpacket_traits::alignment) return pload(from); else return ploadu(from); @@ -465,7 +465,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_trai template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { - if(Alignment >= unpacket_traits::size*sizeof(typename unpacket_traits::type)) + if(Alignment >= unpacket_traits::alignment) pstore(to, from); else pstoreu(to, from); diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index e67fff6c5..b7c90ae16 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -140,13 +140,15 @@ template > { private: + // TODO find ideal packet size + typedef typename packet_traits<_Scalar>::type PacketScalar; enum { row_major_bit = _Options&RowMajor ? RowMajorBit : 0, is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, default_alignment = compute_default_alignment<_Scalar,max_size>::value, actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, - required_alignment = packet_traits<_Scalar>::size * sizeof(_Scalar), // FIXME ask packet_traits for the true required alignment + required_alignment = unpacket_traits::alignment, packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 }; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 1f5d7addd..b24d54ef9 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -433,8 +433,8 @@ struct product_evaluator, ProductTag, DenseShape, LhsAlignment = LhsEtorType::Alignment, RhsAlignment = RhsEtorType::Alignment, - LhsIsAligned = int(LhsAlignment) >= int(sizeof(Scalar)*PacketSize), // FIXME compare to required alignment - RhsIsAligned = int(RhsAlignment) >= int(sizeof(Scalar)*PacketSize), + LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits::alignment), + RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits::alignment), LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index fa308b53e..942f51d51 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -221,10 +221,10 @@ struct redux_impl { const Index size = mat.size(); - const Index packetSize = packet_traits::size; - const int packetBytes = int(packetSize*sizeof(Scalar)); + const Index packetSize = packet_traits::size; + const int packetAlignment = unpacket_traits::alignment; enum { - alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(packetBytes) : int(Unaligned), // FIXME take into account alignment requirement + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) }; const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h index 003a1fc3c..b16e0ddd4 100644 --- a/Eigen/src/Core/arch/AVX/Complex.h +++ b/Eigen/src/Core/arch/AVX/Complex.h @@ -45,7 +45,7 @@ template<> struct packet_traits > : default_packet_traits }; }; -template<> struct unpacket_traits { typedef std::complex type; enum {size=4}; typedef Packet2cf half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet4cf padd(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet4cf psub(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); } @@ -267,7 +267,7 @@ template<> struct packet_traits > : default_packet_traits }; }; -template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; typedef Packet1cd half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet2cd padd(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cd psub(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index a2306fd1a..4dda35b41 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -100,9 +100,9 @@ template<> struct packet_traits : default_packet_traits }; */ -template<> struct unpacket_traits { typedef float type; typedef Packet4f half; enum {size=8}; }; -template<> struct unpacket_traits { typedef double type; typedef Packet2d half; enum {size=4}; }; -template<> struct unpacket_traits { typedef int type; typedef Packet4i half; enum {size=8}; }; +template<> struct unpacket_traits { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; }; +template<> struct unpacket_traits { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; }; +template<> struct unpacket_traits { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; }; template<> EIGEN_STRONG_INLINE Packet8f pset1(const float& from) { return _mm256_set1_ps(from); } template<> EIGEN_STRONG_INLINE Packet4d pset1(const double& from) { return _mm256_set1_pd(from); } diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 1f00e27d8..58c296171 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -53,7 +53,7 @@ template<> struct packet_traits > : default_packet_traits }; }; -template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; typedef Packet2cf half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { @@ -275,7 +275,7 @@ template<> struct packet_traits > : default_packet_traits }; }; -template<> struct unpacket_traits { typedef std::complex type; enum {size=1}; typedef Packet1cd half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu((const double*)from)); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index d647427ce..7ab777009 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -148,8 +148,8 @@ template<> struct packet_traits : default_packet_traits }; -template<> struct unpacket_traits { typedef float type; enum {size=4}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef int type; enum {size=4}; typedef Packet4i half; }; +template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; +template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v) { @@ -756,7 +756,7 @@ template<> struct packet_traits : default_packet_traits }; }; -template<> struct unpacket_traits { typedef double type; enum {size=2}; typedef Packet2d half; }; +template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; inline std::ostream & operator <<(std::ostream & s, const Packet2d & v) diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index ceed1d1ef..8b767a4e7 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -65,8 +65,8 @@ template<> struct packet_traits : default_packet_traits }; -template<> struct unpacket_traits { typedef float type; enum {size=4}; typedef float4 half; }; -template<> struct unpacket_traits { typedef double type; enum {size=2}; typedef double2 half; }; +template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; }; +template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1(const float& from) { return make_float4(from, from, from, from); diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 60ab56a47..d2322b307 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -48,7 +48,7 @@ template<> struct packet_traits > : default_packet_traits }; }; -template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; typedef Packet2cf half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { @@ -306,7 +306,7 @@ template<> struct packet_traits > : default_packet_traits }; }; -template<> struct unpacket_traits { typedef std::complex type; enum {size=1}; typedef Packet1cd half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd pload(const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload((const double*)from)); } template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu((const double*)from)); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index cda9056a4..a72f91f21 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -114,8 +114,8 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } #endif -template<> struct unpacket_traits { typedef float type; enum {size=4}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef int type; enum {size=4}; typedef Packet4i half; }; +template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; +template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return vdupq_n_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return vdupq_n_s32(from); } @@ -570,7 +570,7 @@ template<> struct packet_traits : default_packet_traits }; }; -template<> struct unpacket_traits { typedef double type; enum {size=2}; typedef Packet2d half; }; +template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return vdupq_n_f64(from); } diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 3713b73da..2a44b6272 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -50,7 +50,7 @@ template<> struct packet_traits > : default_packet_traits }; #endif -template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; typedef Packet2cf half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; }; template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); } @@ -297,7 +297,7 @@ template<> struct packet_traits > : default_packet_traits }; #endif -template<> struct unpacket_traits { typedef std::complex type; enum {size=1}; typedef Packet1cd half; }; +template<> struct unpacket_traits { typedef std::complex type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; }; template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 38a84273d..d5e852799 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -144,9 +144,9 @@ template<> struct packet_traits : default_packet_traits }; }; -template<> struct unpacket_traits { typedef float type; enum {size=4}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef double type; enum {size=2}; typedef Packet2d half; }; -template<> struct unpacket_traits { typedef int type; enum {size=4}; typedef Packet4i half; }; +template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; +template<> struct unpacket_traits { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; +template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; #if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 957c36bcf..69a489d43 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -553,7 +553,8 @@ inline Index first_aligned(const Scalar* array, Index size) template inline Index first_default_aligned(const Scalar* array, Index size) { - return first_aligned::size*sizeof(Scalar)>(array, size); + typedef typename packet_traits::type DefaultPacketType; + return first_aligned::alignment>(array, size); } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 81e992392..2808dfd6c 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -119,7 +119,11 @@ template struct unpacket_traits { typedef T type; typedef T half; - enum {size=1}; + enum + { + size = 1, + alignment = 1 + }; }; #if EIGEN_MAX_STATIC_ALIGN_BYTES>0