diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index cebfeaf75..5fffdcb01 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -33,10 +33,10 @@ * \param MatrixType the type of the object in which we are taking a block * \param BlockRows the number of rows of the block we are taking at compile time (optional) * \param BlockCols the number of columns of the block we are taking at compile time (optional) - * \param _PacketAccess allows to enforce aligned loads and stores if set to \b ForceAligned. - * The default is \b AsRequested. This parameter is internaly used by Eigen - * in expressions such as \code mat.block() += other; \endcode and most of - * the time this is the only way it is used. + * \param _PacketAccess \internal used to enforce aligned loads in expressions such as + * \code mat.block() += other; \endcode. Possible values are + * \c AsRequested (default) and \c EnforceAlignedAccess. + * See class MapBase for more details. * \param _DirectAccessStatus \internal used for partial specialization * * This class represents an expression of either a fixed-size or dynamic-size block. It is the return @@ -84,9 +84,9 @@ struct ei_traits::CoeffReadCost, PacketAccess = _PacketAccess }; - typedef typename ei_meta_if&, - Block >::ret AlignedDerivedType; + Block >::ret AlignedDerivedType; }; template class Block @@ -228,13 +228,13 @@ class Block class InnerIterator; typedef typename ei_traits::AlignedDerivedType AlignedDerivedType; - friend class Block; + friend class Block; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) - AlignedDerivedType _convertToForceAligned() + AlignedDerivedType _convertToEnforceAlignedAccess() { - return Block + return Block (m_matrix, Base::m_data, Base::m_rows.value(), Base::m_cols.value()); } diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index f6bc814e2..dba7e20e4 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -31,16 +31,14 @@ * \brief A matrix or vector expression mapping an existing array of data. * * \param MatrixType the equivalent matrix type of the mapped data - * \param _PacketAccess allows to enforce aligned loads and stores if set to ForceAligned. - * The default is AsRequested. This parameter is internaly used by Eigen - * in expressions such as \code Map<...>(...) += other; \endcode and most - * of the time this is the only way it is used. + * \param PointerAlignment specifies whether the pointer is \c Aligned, or \c Unaligned. + * The default is \c Unaligned. * * This class represents a matrix or vector expression mapping an existing array of data. * It can be used to let Eigen interface without any overhead with non-Eigen data structures, * such as plain C arrays or structures from other libraries. * - * \b Tips: to change the array of data mapped by a Map object, you can use the C++ + * \b Tip: to change the array of data mapped by a Map object, you can use the C++ * placement new syntax: * * Example: \include Map_placement_new.cpp @@ -48,22 +46,27 @@ * * This class is the return type of Matrix::Map() but can also be used directly. * + * \b Note \b to \b Eigen \b developers: The template parameter \c PointerAlignment + * can also be or-ed with \c EnforceAlignedAccess in order to enforce aligned read + * in expressions such as \code A += B; \endcode. See class MapBase for further details. + * * \sa Matrix::Map() */ -template -struct ei_traits > : public ei_traits +template +struct ei_traits > : public ei_traits { enum { - PacketAccess = _PacketAccess, - Flags = ei_traits::Flags & ~AlignedBit + PacketAccess = Options & EnforceAlignedAccess, + Flags = (Options&Aligned)==Aligned ? ei_traits::Flags | AlignedBit + : ei_traits::Flags & ~AlignedBit }; - typedef typename ei_meta_if&, - Map >::ret AlignedDerivedType; + typedef typename ei_meta_if&, + Map >::ret AlignedDerivedType; }; -template class Map - : public MapBase > +template class Map + : public MapBase > { public: @@ -72,9 +75,9 @@ template class Map inline int stride() const { return this->innerSize(); } - AlignedDerivedType _convertToForceAligned() + AlignedDerivedType _convertToEnforceAlignedAccess() { - return Map(Base::m_data, Base::m_rows.value(), Base::m_cols.value()); + return AlignedDerivedType(Base::m_data, Base::m_rows.value(), Base::m_cols.value()); } inline Map(const Scalar* data) : Base(data) {} diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 88a3fac1e..8770732de 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -32,11 +32,17 @@ * * Expression classes inheriting MapBase must define the constant \c PacketAccess, * and type \c AlignedDerivedType in their respective ei_traits<> specialization structure. - * The value of \c PacketAccess can be either: - * - \b ForceAligned which enforces both aligned loads and stores - * - \b AsRequested which is the default behavior + * The value of \c PacketAccess can be either \b AsRequested, or set to \b EnforceAlignedAccess which + * enforces both aligned loads and stores. + * + * \c EnforceAlignedAccess is automatically set in expressions such as + * \code A += B; \endcode where A is either a Block or a Map. Here, + * this expression is transfomed into \code A = A_with_EnforceAlignedAccess + B; \endcode + * avoiding unaligned loads from A. Indeed, since Eigen's packet evaluation mechanism + * automatically align to the destination matrix, we know that loads to A will be aligned too. + * * The type \c AlignedDerivedType should correspond to the equivalent expression type - * with \c PacketAccess being \c ForceAligned. + * with \c PacketAccess set to \c EnforceAlignedAccess. * * \sa class Map, class Block */ @@ -79,19 +85,19 @@ template class MapBase * \sa MapBase::stride() */ inline const Scalar* data() const { return m_data; } - template struct force_aligned_impl { + template struct force_aligned_impl { static AlignedDerivedType run(MapBase& a) { return a.derived(); } }; template struct force_aligned_impl { - static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToForceAligned(); } + static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToEnforceAlignedAccess(); } }; /** \returns an expression equivalent to \c *this but having the \c PacketAccess constant - * set to \c ForceAligned. Must be reimplemented by the derived class. */ + * set to \c EnforceAlignedAccess. Must be reimplemented by the derived class. */ AlignedDerivedType forceAligned() { - return force_aligned_impl::run(*this); + return force_aligned_impl::run(*this); } inline const Scalar& coeff(int row, int col) const @@ -131,7 +137,7 @@ template class MapBase template inline PacketScalar packet(int row, int col) const { - return ei_ploadt + return ei_ploadt (m_data + (IsRowMajor ? col + row * stride() : row + col * stride())); } @@ -139,13 +145,13 @@ template class MapBase template inline PacketScalar packet(int index) const { - return ei_ploadt(m_data + index); + return ei_ploadt(m_data + index); } template inline void writePacket(int row, int col, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + (IsRowMajor ? col + row * stride() : row + col * stride()), x); } @@ -153,13 +159,14 @@ template class MapBase template inline void writePacket(int index, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + index, x); } inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + checkDataAlignment(); } inline MapBase(const Scalar* data, int size) @@ -170,6 +177,7 @@ template class MapBase EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) ei_assert(size >= 0); ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size); + checkDataAlignment(); } inline MapBase(const Scalar* data, int rows, int cols) @@ -178,6 +186,7 @@ template class MapBase ei_assert( (data == 0) || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); + checkDataAlignment(); } Derived& operator=(const MapBase& other) @@ -215,6 +224,13 @@ template class MapBase { return derived() = forceAligned() / other; } protected: + + void checkDataAlignment() const + { + ei_assert( ((!(ei_traits::Flags&AlignedBit)) + || ((std::size_t(m_data)&0xf)==0)) && "data is not aligned"); + } + const Scalar* EIGEN_RESTRICT m_data; const ei_int_if_dynamic m_rows; const ei_int_if_dynamic m_cols; diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index 06e69c448..f2d1e7240 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -59,7 +59,7 @@ MatrixBase::stableNorm() const RealScalar invScale = 1; RealScalar ssq = 0; // sum of square enum { - Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? ForceAligned : AsRequested + Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? EnforceAlignedAccess : AsRequested }; int n = size(); int bi=0; diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index affc1d478..169fb5aec 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -196,8 +196,8 @@ const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit; enum { DiagonalOnTheLeft, DiagonalOnTheRight }; -enum { Aligned, Unaligned }; -enum { ForceAligned, AsRequested }; +enum { Unaligned=0, Aligned=1 }; +enum { AsRequested=0, EnforceAlignedAccess=2 }; enum { ConditionalJumpCost = 5 }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; enum DirectionType { Vertical, Horizontal, BothDirections }; diff --git a/test/map.cpp b/test/map.cpp index 62e727304..fbff647f6 100644 --- a/test/map.cpp +++ b/test/map.cpp @@ -37,14 +37,15 @@ template void map_class(const VectorType& m) Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; Map(array1, size) = VectorType::Random(size); - Map(array2, size) = Map(array1, size); + Map(array2, size) = Map(array1, size); Map(array3unaligned, size) = Map(array1, size); - VectorType ma1 = Map(array1, size); + VectorType ma1 = Map(array1, size); VectorType ma2 = Map(array2, size); VectorType ma3 = Map(array3unaligned, size); VERIFY_IS_APPROX(ma1, ma2); VERIFY_IS_APPROX(ma1, ma3); - + VERIFY_RAISES_ASSERT((Map(array3unaligned, size))); + ei_aligned_delete(array1, size); ei_aligned_delete(array2, size); delete[] array3;