From 1f5024332e47f295c991c3781d57d0466d41a9c8 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 6 Aug 2015 15:31:07 +0200 Subject: [PATCH] First part of a big refactoring of alignment control to enable the handling of arbitrarily aligned buffers. It includes: - AlignedBit flag is deprecated. Alignment is now specified by the evaluator through the 'Alignment' enum, e.g., evaluator::Alignment. Its value is in Bytes. - Add several enums to specify alignment: Aligned8, Aligned16, Aligned32, Aligned64, Aligned128. AlignedMax corresponds to EIGEN_MAX_ALIGN_BYTES. Such enums are used to define the above Alignment value, and as the 'Options' template parameter of Map<> and Ref<>. - The Aligned enum is now deprecated. It is now an alias for Aligned16. - Currently, traits>, traits>, traits>, traits>, and traits> also expose the Alignment enum. --- Eigen/src/Core/AssignEvaluator.h | 41 +++++---- Eigen/src/Core/Block.h | 10 ++- Eigen/src/Core/CoreEvaluators.h | 87 ++++++++++++-------- Eigen/src/Core/DenseCoeffsBase.h | 6 +- Eigen/src/Core/GenericPacketMath.h | 12 +-- Eigen/src/Core/Map.h | 4 +- Eigen/src/Core/MapBase.h | 3 +- Eigen/src/Core/Matrix.h | 20 ++++- Eigen/src/Core/PlainObjectBase.h | 14 ++-- Eigen/src/Core/ProductEvaluators.h | 31 +++---- Eigen/src/Core/Redux.h | 13 +-- Eigen/src/Core/Ref.h | 7 +- Eigen/src/Core/StableNorm.h | 18 ++-- Eigen/src/Core/Transpose.h | 2 +- Eigen/src/Core/util/Constants.h | 33 ++++++-- Eigen/src/Core/util/XprHelper.h | 37 --------- Eigen/src/Geometry/Quaternion.h | 12 +-- Eigen/src/Geometry/arch/Geometry_SSE.h | 10 +-- Eigen/src/Jacobi/Jacobi.h | 18 ++-- Eigen/src/LU/arch/Inverse_SSE.h | 8 +- Eigen/src/SparseCore/SparseDiagonalProduct.h | 8 +- test/mapped_matrix.cpp | 20 ++--- test/unalignedassert.cpp | 12 +-- test/vectorization_logic.cpp | 5 +- 24 files changed, 231 insertions(+), 200 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index b0468dd64..39efb1d5a 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -28,18 +28,19 @@ template struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; - + typedef typename Dst::Scalar DstScalar; enum { DstFlags = DstEvaluator::Flags, - SrcFlags = SrcEvaluator::Flags + SrcFlags = SrcEvaluator::Flags, + RequiredAlignment = packet_traits::size*sizeof(DstScalar) // FIXME ask packet_traits for the true alignment requirement }; public: enum { - DstIsAligned = DstFlags & AlignedBit, + DstAlignment = DstEvaluator::Alignment, + SrcAlignment = SrcEvaluator::Alignment, DstHasDirectAccess = DstFlags & DirectAccessBit, - SrcIsAligned = SrcFlags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned + JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) }; private: @@ -51,7 +52,7 @@ private: : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - PacketSize = packet_traits::size + PacketSize = packet_traits::size }; enum { @@ -62,10 +63,10 @@ private: && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits::PacketAccess), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), + && int(JointAlignment)>=int(RequiredAlignment), MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), + && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ MaySliceVectorize = MightVectorize && DstHasDirectAccess @@ -107,8 +108,8 @@ public: : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) - : int(NoUnrolling) ) + ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling) + : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) @@ -124,8 +125,9 @@ public: EIGEN_DEBUG_VAR(DstFlags) EIGEN_DEBUG_VAR(SrcFlags) std::cerr.unsetf(std::ios::hex); - EIGEN_DEBUG_VAR(DstIsAligned) - EIGEN_DEBUG_VAR(SrcIsAligned) + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(RequiredAlignment) EIGEN_DEBUG_VAR(JointAlignment) EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerMaxSize) @@ -360,11 +362,13 @@ struct dense_assignment_loop EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index size = kernel.size(); - typedef packet_traits PacketTraits; + typedef typename Kernel::Scalar Scalar; + typedef packet_traits PacketTraits; enum { packetSize = PacketTraits::size, - dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned), - dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned, + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment), + dstAlignment = PacketTraits::AlignedOnScalar ? int(Kernel::AssignmentTraits::RequiredAlignment) + : int(Kernel::AssignmentTraits::DstAlignment), srcAlignment = Kernel::AssignmentTraits::JointAlignment }; const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size); @@ -475,9 +479,10 @@ struct dense_assignment_loop typedef packet_traits PacketTraits; enum { packetSize = PacketTraits::size, - alignable = PacketTraits::AlignedOnScalar, - dstIsAligned = Kernel::AssignmentTraits::DstIsAligned, - dstAlignment = alignable ? Aligned : int(dstIsAligned) + alignable = PacketTraits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(Kernel::AssignmentTraits::RequiredAlignment), + dstAlignment = alignable ? int(Kernel::AssignmentTraits::RequiredAlignment) + : int(Kernel::AssignmentTraits::DstAlignment) }; const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index aed6147c7..3748e259b 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -81,14 +81,16 @@ struct traits > : traits::ret) : int(inner_stride_at_compile_time::ret), - // IsAligned is needed by MapBase's assertions - // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator - IsAligned = 0, + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - Flags = (traits::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit + Flags = (traits::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit, // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + Alignment = 0 }; }; diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 6b0e3617d..910d04ecb 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -111,6 +111,10 @@ struct evaluator_base typedef typename traits::StorageIndex StorageIndex; // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits ExpressionTraits; + + enum { + Alignment = 0 + }; }; // -------------------- Matrix and Array -------------------- @@ -137,8 +141,8 @@ struct evaluator > ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, CoeffReadCost = NumTraits::ReadCost, - Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime, - Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret + Flags = traits::EvaluatorFlags, + Alignment = traits::Alignment }; EIGEN_DEVICE_FUNC evaluator() @@ -255,7 +259,8 @@ struct unary_evaluator, IndexBased> enum { CoeffReadCost = evaluator::CoeffReadCost, - Flags = evaluator::Flags ^ RowMajorBit + Flags = evaluator::Flags ^ RowMajorBit, + Alignment = evaluator::Alignment }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} @@ -331,7 +336,8 @@ struct evaluator > & ( HereditaryBits | (functor_has_linear_access::ret ? LinearAccessBit : 0) | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore + Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax?? }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) @@ -378,9 +384,9 @@ struct unary_evaluator, IndexBased > enum { CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, - Flags = evaluator::Flags & ( - HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits::PacketAccess ? PacketAccessBit : 0)) + Flags = evaluator::Flags + & (HereditaryBits | LinearAccessBit | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator::Alignment }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) @@ -447,13 +453,13 @@ struct binary_evaluator, IndexBased, IndexBase Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( HereditaryBits | (int(LhsFlags) & int(RhsFlags) & - ( AlignedBit - | (StorageOrdersAgree ? LinearAccessBit : 0) + ( (StorageOrdersAgree ? LinearAccessBit : 0) | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) ) ) ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) }; EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) @@ -506,7 +512,9 @@ struct unary_evaluator, IndexBased> enum { CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, - Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) @@ -641,7 +649,6 @@ struct evaluator > HasNoInnerStride = InnerStrideAtCompileTime == 1, HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, // TODO: should check for smaller packet types once we can handle multi-sized packet types @@ -653,10 +660,13 @@ struct evaluator > || ( OuterStrideAtCompileTime!=Dynamic && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ), Flags0 = evaluator::Flags, - Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), - Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags1) : int(Flags1 & ~LinearAccessBit), - Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) + //Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags0) : int(Flags0 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit), + + //IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&int(AlignedMask))>0), + Alignment = int(MapOptions)&int(AlignedMask) }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) @@ -673,7 +683,8 @@ struct evaluator > typedef Ref XprType; enum { - Flags = evaluator >::Flags + Flags = evaluator >::Flags, + Alignment = evaluator >::Alignment }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) @@ -717,17 +728,17 @@ struct evaluator > && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, - // TODO: should check for smaller packet types once we can handle multi-sized packet types - AlignBytes = int(packet_traits::size) * sizeof(Scalar), - - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | DirectAccessBit | - MaskPacketAccessBit | - MaskAlignedBit), - Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit + MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + // TODO: should check for smaller packet types once we can handle multi-sized packet types + AlignBytes = int(packet_traits::size) * sizeof(Scalar), + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignBytes : 0, + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} @@ -833,11 +844,8 @@ struct block_evaluator(block) { - // TODO: should check for smaller packet types once we can handle multi-sized packet types - const int AlignBytes = int(packet_traits::size) * sizeof(Scalar); - EIGEN_ONLY_USED_FOR_DEBUG(AlignBytes) // FIXME this should be an internal assertion - eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned"); + eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); } }; @@ -856,7 +864,9 @@ struct evaluator > + EIGEN_SIZE_MAX(evaluator::CoeffReadCost, evaluator::CoeffReadCost), - Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, + + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment) }; inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) @@ -908,7 +918,9 @@ struct unary_evaluator > enum { CoeffReadCost = evaluator::CoeffReadCost, - Flags = (evaluator::Flags & HereditaryBits & ~RowMajorBit) | (traits::Flags & RowMajorBit) + Flags = (evaluator::Flags & HereditaryBits & ~RowMajorBit) | (traits::Flags & RowMajorBit), + + Alignment = evaluator::Alignment }; EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) @@ -992,7 +1004,9 @@ struct evaluator > CoeffReadCost = TraversalSize==Dynamic ? Dynamic : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), - Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits) + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits), + + Alignment = 0 // FIXME this could be improved }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) @@ -1028,7 +1042,8 @@ struct evaluator_wrapper_base typedef typename remove_all::type ArgType; enum { CoeffReadCost = evaluator::CoeffReadCost, - Flags = evaluator::Flags + Flags = evaluator::Flags, + Alignment = evaluator::Alignment }; EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} @@ -1144,7 +1159,9 @@ struct unary_evaluator > LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) ? LinearAccessBit : 0, - Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess) + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. }; typedef internal::reverse_packet_cond reverse_packet; @@ -1226,7 +1243,9 @@ struct evaluator > enum { CoeffReadCost = evaluator::CoeffReadCost, - Flags = (unsigned int)evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit + Flags = (unsigned int)evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit, + + Alignment = 0 }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index f08380bed..11e2a1809 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -602,11 +602,11 @@ struct first_aligned_impl * documentation. */ template -static inline Index first_aligned(const Derived& m) +static inline Index first_aligned(const DenseBase& m) { return first_aligned_impl - - ::run(m); + ::Alignment > 0 ) || !(Derived::Flags & DirectAccessBit)> // FIXME Alignment! + ::run(m.derived()); } template::ret> diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index cbb15f79d..4cc5f656f 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -450,22 +450,22 @@ pmadd(const Packet& a, { return padd(pmul(a, b),c); } /** \internal \returns a packet version of \a *from. - * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ -template + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::size*sizeof(typename unpacket_traits::type)) return pload(from); else return ploadu(from); } /** \internal copy the packet \a from to \a *to. - * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ -template + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::size*sizeof(typename unpacket_traits::type)) pstore(to, from); else pstoreu(to, from); diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 2b5971730..3a8375da9 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -19,7 +19,7 @@ namespace Eigen { * \brief A matrix or vector expression mapping an existing array of data. * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout * of an ordinary, contiguous array. This can be overridden by specifying strides. @@ -77,7 +77,7 @@ struct traits > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), - IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned), + Alignment = int(MapOptions)&int(AlignedMask), Flags0 = TraitsBase::Flags & (~NestByRefBit), Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) }; diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index b175a3fa0..ae28d4db6 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -160,9 +160,8 @@ template class MapBase EIGEN_DEVICE_FUNC void checkSanity() const { - // TODO "IsAligned" should be replaced to handle arbitrary alignment #if EIGEN_MAX_ALIGN_BYTES>0 - eigen_assert(EIGEN_IMPLIES(internal::traits::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned"); + eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits::Alignment)) == 0) && "data is not aligned"); #endif } diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 83c1ef2c8..e67fff6c5 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -139,6 +139,18 @@ namespace internal { template struct traits > { +private: + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = packet_traits<_Scalar>::size * sizeof(_Scalar), // FIXME ask packet_traits for the true required alignment + packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 + }; + +public: typedef _Scalar Scalar; typedef Dense StorageKind; typedef Eigen::Index StorageIndex; @@ -149,11 +161,13 @@ struct traits > MaxRowsAtCompileTime = _MaxRows, MaxColsAtCompileTime = _MaxCols, Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase - EvaluatorFlags = compute_matrix_evaluator_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, Options = _Options, InnerStrideAtCompileTime = 1, - OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment }; }; } diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 9cb32e7d8..e83b92476 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -116,20 +116,20 @@ class PlainObjectBase : public internal::dense_xpr_base::type typedef Eigen::Map MapType; friend class Eigen::Map; typedef const Eigen::Map ConstMapType; - friend class Eigen::Map; - typedef Eigen::Map AlignedMapType; - friend class Eigen::Map; - typedef const Eigen::Map ConstAlignedMapType; + friend class Eigen::Map; + typedef Eigen::Map AlignedMapType; + friend class Eigen::Map; + typedef const Eigen::Map ConstAlignedMapType; template struct StridedMapType { typedef Eigen::Map type; }; template struct StridedConstMapType { typedef Eigen::Map type; }; - template struct StridedAlignedMapType { typedef Eigen::Map type; }; - template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; + template struct StridedAlignedMapType { typedef Eigen::Map type; }; + template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; protected: DenseStorage m_storage; public: - enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits::EvaluatorFlags & AlignedBit) != 0 }; + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits::Alignment>0) }; EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 43ba86193..1f5d7addd 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -430,24 +430,22 @@ struct product_evaluator, ProductTag, DenseShape, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, + LhsAlignment = LhsEtorType::Alignment, + RhsAlignment = RhsEtorType::Alignment, + + LhsIsAligned = int(LhsAlignment) >= int(sizeof(Scalar)*PacketSize), // FIXME compare to required alignment + RhsIsAligned = int(RhsAlignment) >= int(sizeof(Scalar)*PacketSize), + LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, SameType = is_same::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic - || ( (ColsAtCompileTime % packet_traits::size) == 0 - && (RhsFlags&AlignedBit) - ) - ), + && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic - || ( (RowsAtCompileTime % packet_traits::size) == 0 - && (LhsFlags&AlignedBit) - ) - ), + && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -455,11 +453,13 @@ struct product_evaluator, ProductTag, DenseShape, Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) - | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) - | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) // TODO enable vectorization for mixed types | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + Alignment = CanVectorizeLhs ? LhsAlignment + : CanVectorizeRhs ? RhsAlignment + : 0, + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect @@ -469,7 +469,7 @@ struct product_evaluator, ProductTag, DenseShape, && LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsFlags & RhsFlags & AlignedBit) + && (LhsIsAligned && RhsIsAligned) && (InnerSize % packet_traits::size == 0) }; @@ -706,7 +706,8 @@ public: //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, - Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), + Alignment = evaluator::Alignment }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) @@ -732,7 +733,7 @@ protected: { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) + DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! }; return internal::pmul(m_matImpl.template packet(row, col), m_diagImpl.template packet(id)); diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index fea4e2895..0c25223aa 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -165,7 +165,7 @@ struct redux_vec_unroller index = Start * packet_traits::size, outer = index / int(Derived::InnerSizeAtCompileTime), inner = index % int(Derived::InnerSizeAtCompileTime), - alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned + alignment = Derived::Alignment }; typedef typename Derived::Scalar Scalar; @@ -222,10 +222,10 @@ struct redux_impl const Index size = mat.size(); const Index packetSize = packet_traits::size; - const Index alignedStart = internal::first_aligned(mat); + const Index alignedStart = internal::first_aligned(mat.nestedExpression()); enum { - alignment = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) || bool(Derived::Flags & AlignedBit) - ? Aligned : Unaligned + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(sizeof(Scalar)*packetSize) : int(Unaligned), // FIXME take into account alignment requirement + alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) }; const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); @@ -352,7 +352,8 @@ public: IsRowMajor = XprType::IsRowMajor, SizeAtCompileTime = XprType::SizeAtCompileTime, InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, - CoeffReadCost = evaluator::CoeffReadCost + CoeffReadCost = evaluator::CoeffReadCost, + Alignment = evaluator::Alignment }; EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } @@ -385,6 +386,8 @@ public: PacketReturnType packetByOuterInner(Index outer, Index inner) const { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + const XprType & nestedExpression() const { return m_xpr; } + protected: typename internal::evaluator::nestedType m_evaluator; const XprType &m_xpr; diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index ea5a2bd5c..61de5ed17 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -18,7 +18,7 @@ namespace Eigen { * \brief A matrix or vector expression mapping an existing expression * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), * but accepts a variable outer stride (leading dimension). @@ -92,7 +92,8 @@ struct traits > typedef _StrideType StrideType; enum { Options = _Options, - Flags = traits >::Flags | NestByRefBit + Flags = traits >::Flags | NestByRefBit, + Alignment = traits >::Alignment }; template struct match { @@ -104,7 +105,7 @@ struct traits > || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), - AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits::Flags&AlignedBit)==AlignedBit), + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (int(evaluator::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment ScalarTypeMatch = internal::is_same::value, MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index f9cd01b7e..aca81f463 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -162,21 +162,27 @@ MatrixBase::stableNorm() const RealScalar scale(0); RealScalar invScale(1); RealScalar ssq(0); // sum of square + + typedef typename internal::nested_eval::type DerivedCopy; + typedef typename internal::remove_all::type DerivedCopyClean; + DerivedCopy copy(derived()); + enum { - Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0 + CanAlign = (int(Flags)&DirectAccessBit) || (int(internal::evaluator::Alignment)>0) // FIXME }; - typedef typename internal::conditional, Aligned>, - typename Base::ConstSegmentReturnType>::type SegmentWrapper; + typedef typename internal::conditional, internal::evaluator::Alignment>, + typename DerivedCopyClean + ::ConstSegmentReturnType>::type SegmentWrapper; Index n = size(); if(n==1) return abs(this->coeff(0)); - Index bi = internal::first_aligned(derived()); + Index bi = internal::first_aligned(copy); if (bi>0) - internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale); + internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale); for (; bisegment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale); + internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale); return scale * sqrt(ssq); } diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index e205cec4a..2152405d5 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -233,7 +233,7 @@ struct inplace_transpose_selector { // PacketSize x Packet typedef typename MatrixType::Scalar Scalar; typedef typename internal::packet_traits::type Packet; const Index PacketSize = internal::packet_traits::size; - const Index Alignment = internal::evaluator::Flags&AlignedBit ? Aligned : Unaligned; + const Index Alignment = internal::evaluator::Alignment; PacketBlock A; for (Index i=0; i(i,0); diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 93c0786fa..3e811a173 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2007-2009 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -140,7 +140,7 @@ const unsigned int LvalueBit = 0x20; */ const unsigned int DirectAccessBit = 0x40; -/** \ingroup flags +/* \ingroup flags * * means the first coefficient packet is guaranteed to be aligned. * An expression cannot has the AlignedBit without the PacketAccessBit flag. @@ -215,12 +215,31 @@ enum { }; /** \ingroup enums - * Enum for indicating whether an object is aligned or not. */ + * Enum for indicating whether a buffer is aligned or not. */ enum { - /** Object is not correctly aligned for vectorization. */ - Unaligned=0, - /** Object is aligned for vectorization. */ - Aligned=1 + Unaligned=0, /**< Data pointer has no specific alignment. */ + Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ + Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ + Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */ + Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */ + Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */ + AlignedMask=255, + Aligned=16, /**< \deprecated Synonym for Aligned16. */ +#if EIGEN_MAX_ALIGN_BYTES==128 + AlignedMax = Aligned128 +#elif EIGEN_MAX_ALIGN_BYTES==64 + AlignedMax = Aligned64 +#elif EIGEN_MAX_ALIGN_BYTES==32 + AlignedMax = Aligned32 +#elif EIGEN_MAX_ALIGN_BYTES==16 + AlignedMax = Aligned16 +#elif EIGEN_MAX_ALIGN_BYTES==8 + AlignedMax = Aligned8 +#elif EIGEN_MAX_ALIGN_BYTES==0 + AlignedMax = Unaligned +#else +#error Invalid value for EIGEN_MAX_ALIGN_BYTES +#endif }; /** \ingroup enums diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 6dc1f6e3f..81e992392 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -192,43 +192,6 @@ class compute_matrix_flags enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit }; }; -template -class compute_matrix_evaluator_flags -{ - enum { - row_major_bit = Options&RowMajor ? RowMajorBit : 0, - is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, - - // TODO: should check for smaller packet types once we can handle multi-sized packet types - align_bytes = int(packet_traits::size) * sizeof(Scalar), - - aligned_bit = - ( - ((Options&DontAlign)==0) - && ( -#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 - ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0)) -#else - 0 -#endif - - || - -#if EIGEN_MAX_ALIGN_BYTES!=0 - is_dynamic_size_storage -#else - 0 -#endif - - ) - ) ? AlignedBit : 0, - packet_access_bit = packet_traits::Vectorizable && aligned_bit ? PacketAccessBit : 0 - }; - - public: - enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit }; -}; - template struct size_at_compile_time { enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols }; diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 15a063994..7a30dc0b6 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -217,8 +217,8 @@ struct traits > typedef _Scalar Scalar; typedef Matrix<_Scalar,4,1,_Options> Coefficients; enum{ - IsAligned = (internal::traits::EvaluatorFlags & AlignedBit) != 0, - Flags = IsAligned ? (AlignedBit | LvalueBit) : LvalueBit + Alignment = internal::traits::Alignment, + Flags = LvalueBit }; }; } @@ -228,7 +228,7 @@ class Quaternion : public QuaternionBase > { public: typedef QuaternionBase > Base; - enum { IsAligned = internal::traits::IsAligned }; + enum { NeedsAlignment = internal::traits::Alignment>0 }; typedef _Scalar Scalar; @@ -277,7 +277,7 @@ public: inline Coefficients& coeffs() { return m_coeffs;} inline const Coefficients& coeffs() const { return m_coeffs;} - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(IsAligned) + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsAlignment) protected: Coefficients m_coeffs; @@ -441,7 +441,7 @@ QuaternionBase::operator* (const QuaternionBase& other) c YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return internal::quat_product::Scalar, - (internal::traits::IsAligned && internal::traits::IsAligned)?Aligned:Unaligned>::run(*this, other); + EIGEN_PLAIN_ENUM_MIN(internal::traits::Alignment, internal::traits::Alignment)>::run(*this, other); } /** \sa operator*(Quaternion) */ @@ -668,7 +668,7 @@ QuaternionBase::conjugate() const { return internal::quat_conj::Scalar, - internal::traits::IsAligned?Aligned:Unaligned>::run(*this); + internal::traits::Alignment>::run(*this); } diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index e59c32c56..1a86ff837 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -16,14 +16,14 @@ namespace Eigen { namespace internal { template -struct quat_product +struct quat_product { static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { Quaternion res; const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); - __m128 a = _a.coeffs().template packet(0); - __m128 b = _b.coeffs().template packet(0); + __m128 a = _a.coeffs().template packet(0); + __m128 b = _b.coeffs().template packet(0); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); pstore(&res.x(), @@ -55,8 +55,8 @@ struct cross3_impl static inline typename plain_matrix_type::type run(const VectorLhs& lhs, const VectorRhs& rhs) { - __m128 a = lhs.template packet(0); - __m128 b = rhs.template packet(0); + __m128 a = lhs.template packet::Alignment>(0); + __m128 b = rhs.template packet::Alignment>(0); __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); typename plain_matrix_type::type res; diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index 25eabe984..b7b83dcd2 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -263,7 +263,7 @@ namespace internal { * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight() */ template -void apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation& j); +void apply_rotation_in_the_plane(DenseBase& xpr_x, DenseBase& xpr_y, const JacobiRotation& j); } /** \jacobi_module @@ -298,18 +298,18 @@ inline void MatrixBase::applyOnTheRight(Index p, Index q, const JacobiR namespace internal { template -void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, const JacobiRotation& j) +void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x, DenseBase& xpr_y, const JacobiRotation& j) { typedef typename VectorX::Scalar Scalar; enum { PacketSize = packet_traits::size }; typedef typename packet_traits::type Packet; - eigen_assert(_x.size() == _y.size()); - Index size = _x.size(); - Index incrx = _x.innerStride(); - Index incry = _y.innerStride(); + eigen_assert(xpr_x.size() == xpr_y.size()); + Index size = xpr_x.size(); + Index incrx = xpr_x.derived().innerStride(); + Index incry = xpr_y.derived().innerStride(); - Scalar* EIGEN_RESTRICT x = &_x.coeffRef(0); - Scalar* EIGEN_RESTRICT y = &_y.coeffRef(0); + Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0); + Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0); OtherScalar c = j.c(); OtherScalar s = j.s(); @@ -392,7 +392,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(VectorX& _x, VectorY& _y, /*** fixed-size vectorized path ***/ else if(VectorX::SizeAtCompileTime != Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (VectorX::Flags & VectorY::Flags & AlignedBit)) + (EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment)>0)) // FIXME should be compared to the required alignment { const Packet pc = pset1(c); const Packet ps = pset1(s); diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h index 1f62ef14e..e1470c664 100644 --- a/Eigen/src/LU/arch/Inverse_SSE.h +++ b/Eigen/src/LU/arch/Inverse_SSE.h @@ -35,8 +35,8 @@ template struct compute_inverse_size4 { enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), + MatrixAlignment = traits::Alignment, + ResultAlignment = traits::Alignment, StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; @@ -165,8 +165,8 @@ template struct compute_inverse_size4 { enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), + MatrixAlignment = traits::Alignment, + ResultAlignment = traits::Alignment, StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index 29a67da35..d82927216 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -41,7 +41,7 @@ struct product_evaluator, ProductTag, Diagonal typedef Product XprType; typedef evaluator type; typedef evaluator nestedType; - enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} @@ -54,14 +54,14 @@ struct product_evaluator, ProductTag, SparseSh typedef Product XprType; typedef evaluator type; typedef evaluator nestedType; - enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {} }; template -struct sparse_diagonal_product_evaluator +struct sparse_diagonal_product_evaluator { protected: typedef typename evaluator::InnerIterator SparseXprInnerIterator; @@ -92,7 +92,7 @@ protected: template -struct sparse_diagonal_product_evaluator +struct sparse_diagonal_product_evaluator { typedef typename SparseXprType::Scalar Scalar; diff --git a/test/mapped_matrix.cpp b/test/mapped_matrix.cpp index f080ca7ed..7c7099792 100644 --- a/test/mapped_matrix.cpp +++ b/test/mapped_matrix.cpp @@ -25,15 +25,15 @@ template void map_class_vector(const VectorType& m) Scalar* array1 = internal::aligned_new(size); Scalar* array2 = internal::aligned_new(size); Scalar* array3 = new Scalar[size+1]; - Scalar* array3unaligned = size_t(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; + Scalar* array3unaligned = (std::size_t(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3; Scalar array4[EIGEN_TESTMAP_MAX_SIZE]; - Map(array1, size) = VectorType::Random(size); - Map(array2, size) = Map(array1, size); + Map(array1, size) = VectorType::Random(size); + Map(array2, size) = Map(array1, size); Map(array3unaligned, size) = Map(array1, size); - Map(array4, size) = Map(array1, size); - VectorType ma1 = Map(array1, size); - VectorType ma2 = Map(array2, size); + Map(array4, size) = Map(array1, size); + VectorType ma1 = Map(array1, size); + VectorType ma2 = Map(array2, size); VectorType ma3 = Map(array3unaligned, size); VectorType ma4 = Map(array4, size); VERIFY_IS_EQUAL(ma1, ma2); @@ -41,7 +41,7 @@ template void map_class_vector(const VectorType& m) VERIFY_IS_EQUAL(ma1, ma4); #ifdef EIGEN_VECTORIZE if(internal::packet_traits::Vectorizable) - VERIFY_RAISES_ASSERT((Map(array3unaligned, size))) + VERIFY_RAISES_ASSERT((Map(array3unaligned, size))) #endif internal::aligned_delete(array1, size); @@ -71,7 +71,7 @@ template void map_class_matrix(const MatrixType& m) for(int i = 0; i < size; i++) array4[i] = Scalar(1); Map map1(array1, rows, cols); - Map map2(array2, rows, cols); + Map map2(array2, rows, cols); Map map3(array3unaligned, rows, cols); Map map4(array4, rows, cols); @@ -154,9 +154,9 @@ template void check_const_correctness(const PlainObjec // verify that map-to-const don't have LvalueBit typedef typename internal::add_const::type ConstPlainObjectType; VERIFY( !(internal::traits >::Flags & LvalueBit) ); - VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(internal::traits >::Flags & LvalueBit) ); VERIFY( !(Map::Flags & LvalueBit) ); - VERIFY( !(Map::Flags & LvalueBit) ); + VERIFY( !(Map::Flags & LvalueBit) ); } template diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index f53f167b4..014cc834b 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -162,12 +162,12 @@ void unalignedassert() } for(int b=8; b(b)); - VERIFY_RAISES_ASSERT(construct_at_boundary(b)); - VERIFY_RAISES_ASSERT(construct_at_boundary(b)); - VERIFY_RAISES_ASSERT(construct_at_boundary(b)); - VERIFY_RAISES_ASSERT(construct_at_boundary(b)); - VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<64) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<128) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); } #endif } diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 97477072a..18dad6b5d 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -35,7 +35,6 @@ std::string demangle_flags(int f) if(f&LinearAccessBit) res += " | Linear"; if(f&LvalueBit) res += " | Lvalue"; if(f&DirectAccessBit) res += " | Direct"; - if(f&AlignedBit) res += " | Aligned"; if(f&NestByRefBit) res += " | NestByRef"; if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; @@ -204,12 +203,12 @@ template::Vectori LinearVectorizedTraversal,CompleteUnrolling)); VERIFY((test_assign< - Map >, + Map >, Matrix22 >(InnerVectorizedTraversal,CompleteUnrolling))); VERIFY((test_assign< - Map, Aligned, InnerStride<3*PacketSize> >, + Map, AlignedMax, InnerStride<3*PacketSize> >, Matrix >(DefaultTraversal,CompleteUnrolling)));