From 4af1753b6fa83e2c0013f38e6d9e8538896d9d3c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2009 16:30:14 +0100 Subject: [PATCH] * remove EnforceAlignedAccess option to Block, VectorBlock, Map and MapBase because thanks to the previous commit this is not needed anymore * add a more general ForceAlignedAccess expression which can be used for any expression. It is already used by StableNorm.h. --- Eigen/Core | 1 + Eigen/src/Core/Block.h | 34 ++--- Eigen/src/Core/ForceAlignedAccess.h | 151 ++++++++++++++++++++++ Eigen/src/Core/Map.h | 18 +-- Eigen/src/Core/MapBase.h | 39 +----- Eigen/src/Core/MatrixBase.h | 4 + Eigen/src/Core/StableNorm.h | 4 +- Eigen/src/Core/VectorBlock.h | 19 +-- Eigen/src/Core/util/Constants.h | 1 - Eigen/src/Core/util/ForwardDeclarations.h | 7 +- 10 files changed, 182 insertions(+), 96 deletions(-) create mode 100644 Eigen/src/Core/ForceAlignedAccess.h diff --git a/Eigen/Core b/Eigen/Core index 085022310..dd5f54924 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -164,6 +164,7 @@ struct Dense {}; #include "src/Core/util/BlasUtil.h" #include "src/Core/MatrixStorage.h" #include "src/Core/NestByValue.h" +#include "src/Core/ForceAlignedAccess.h" #include "src/Core/ReturnByValue.h" #include "src/Core/NoAlias.h" #include "src/Core/Matrix.h" diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 9b0ada19b..ef2d791c9 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -33,10 +33,6 @@ * \param MatrixType the type of the object in which we are taking a block * \param BlockRows the number of rows of the block we are taking at compile time (optional) * \param BlockCols the number of columns of the block we are taking at compile time (optional) - * \param _PacketAccess \internal used to enforce aligned loads in expressions such as - * \code mat.block() += other; \endcode. Possible values are - * \c AsRequested (default) and \c EnforceAlignedAccess. - * See class MapBase for more details. * \param _DirectAccessStatus \internal used for partial specialization * * This class represents an expression of either a fixed-size or dynamic-size block. It is the return @@ -61,8 +57,8 @@ * * \sa MatrixBase::block(int,int,int,int), MatrixBase::block(int,int), class VectorBlock */ -template -struct ei_traits > +template +struct ei_traits > { typedef typename ei_traits::Scalar Scalar; typedef typename ei_nested::type MatrixTypeNested; @@ -82,16 +78,12 @@ struct ei_traits::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit)) | FlagsLinearAccessBit, - CoeffReadCost = ei_traits::CoeffReadCost, - PacketAccess = _PacketAccess + CoeffReadCost = ei_traits::CoeffReadCost }; - typedef typename ei_meta_if&, - Block >::ret AlignedDerivedType; }; -template class Block - : public MatrixBase > +template class Block + : public MatrixBase > { public: @@ -219,26 +211,16 @@ template -class Block - : public MapBase > +template +class Block + : public MapBase > { public: _EIGEN_GENERIC_PUBLIC_INTERFACE(Block, MapBase) - class InnerIterator; - typedef typename ei_traits::AlignedDerivedType AlignedDerivedType; - friend class Block; - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) - AlignedDerivedType _convertToEnforceAlignedAccess() - { - return Block - (m_matrix, Base::m_data, Base::m_rows.value(), Base::m_cols.value()); - } - /** Column or Row constructor */ inline Block(const MatrixType& matrix, int i) diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h new file mode 100644 index 000000000..b3fbb3c5c --- /dev/null +++ b/Eigen/src/Core/ForceAlignedAccess.h @@ -0,0 +1,151 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_FORCEALIGNEDACCESS_H +#define EIGEN_FORCEALIGNEDACCESS_H + +/** \class ForceAlignedAccess + * + * \brief Enforce aligned packet loads and stores regardless of what is requested + * + * \param ExpressionType the type of the object of which we are forcing aligned packet access + * + * This class is the return type of MatrixBase::forceAlignedAccess() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::forceAlignedAccess() + */ +template +struct ei_traits > : public ei_traits +{}; + +template class ForceAlignedAccess + : public MatrixBase > +{ + public: + + EIGEN_GENERIC_PUBLIC_INTERFACE(ForceAlignedAccess) + + inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + + inline int rows() const { return m_expression.rows(); } + inline int cols() const { return m_expression.cols(); } + inline int stride() const { return m_expression.stride(); } + + inline const CoeffReturnType coeff(int row, int col) const + { + return m_expression.coeff(row, col); + } + + inline Scalar& coeffRef(int row, int col) + { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + inline const CoeffReturnType coeff(int index) const + { + return m_expression.coeff(index); + } + + inline Scalar& coeffRef(int index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(int row, int col) const + { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(int row, int col, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(int index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(int index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(index, x); + } + + operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType& m_expression; + + private: + ForceAlignedAccess& operator=(const ForceAlignedAccess&); +}; + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(),class ForceAlignedAccess + */ +template +inline const ForceAlignedAccess +MatrixBase::forceAlignedAccess() const +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(), class ForceAlignedAccess + */ +template +inline ForceAlignedAccess +MatrixBase::forceAlignedAccess() +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline const typename ei_meta_if,Derived&>::ret +MatrixBase::forceAlignedAccessIf() const +{ + return derived(); +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline typename ei_meta_if,Derived&>::ret +MatrixBase::forceAlignedAccessIf() +{ + return derived(); +} + +#endif // EIGEN_FORCEALIGNEDACCESS_H diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index dba7e20e4..de93ce30b 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -31,8 +31,8 @@ * \brief A matrix or vector expression mapping an existing array of data. * * \param MatrixType the equivalent matrix type of the mapped data - * \param PointerAlignment specifies whether the pointer is \c Aligned, or \c Unaligned. - * The default is \c Unaligned. + * \param Options specifies whether the pointer is \c Aligned, or \c Unaligned. + * The default is \c Unaligned. * * This class represents a matrix or vector expression mapping an existing array of data. * It can be used to let Eigen interface without any overhead with non-Eigen data structures, @@ -46,23 +46,15 @@ * * This class is the return type of Matrix::Map() but can also be used directly. * - * \b Note \b to \b Eigen \b developers: The template parameter \c PointerAlignment - * can also be or-ed with \c EnforceAlignedAccess in order to enforce aligned read - * in expressions such as \code A += B; \endcode. See class MapBase for further details. - * * \sa Matrix::Map() */ template struct ei_traits > : public ei_traits { enum { - PacketAccess = Options & EnforceAlignedAccess, Flags = (Options&Aligned)==Aligned ? ei_traits::Flags | AlignedBit : ei_traits::Flags & ~AlignedBit }; - typedef typename ei_meta_if&, - Map >::ret AlignedDerivedType; }; template class Map @@ -71,15 +63,9 @@ template class Map public: _EIGEN_GENERIC_PUBLIC_INTERFACE(Map, MapBase) - typedef typename ei_traits::AlignedDerivedType AlignedDerivedType; inline int stride() const { return this->innerSize(); } - AlignedDerivedType _convertToEnforceAlignedAccess() - { - return AlignedDerivedType(Base::m_data, Base::m_rows.value(), Base::m_cols.value()); - } - inline Map(const Scalar* data) : Base(data) {} inline Map(const Scalar* data, int size) : Base(data, size) {} diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 0f0986bc5..31a0dbc6b 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -30,20 +30,6 @@ * * \brief Base class for Map and Block expression with direct access * - * Expression classes inheriting MapBase must define the constant \c PacketAccess, - * and type \c AlignedDerivedType in their respective ei_traits<> specialization structure. - * The value of \c PacketAccess can be either \b AsRequested, or set to \b EnforceAlignedAccess which - * enforces both aligned loads and stores. - * - * \c EnforceAlignedAccess is automatically set in expressions such as - * \code A += B; \endcode where A is either a Block or a Map. Here, - * this expression is transfomed into \code A = A_with_EnforceAlignedAccess + B; \endcode - * avoiding unaligned loads from A. Indeed, since Eigen's packet evaluation mechanism - * automatically align to the destination matrix, we know that loads to A will be aligned too. - * - * The type \c AlignedDerivedType should correspond to the equivalent expression type - * with \c PacketAccess set to \c EnforceAlignedAccess. - * * \sa class Map, class Block */ template class MapBase @@ -54,13 +40,11 @@ template class MapBase typedef MatrixBase Base; enum { IsRowMajor = (int(ei_traits::Flags) & RowMajorBit) ? 1 : 0, - PacketAccess = ei_traits::PacketAccess, RowsAtCompileTime = ei_traits::RowsAtCompileTime, ColsAtCompileTime = ei_traits::ColsAtCompileTime, SizeAtCompileTime = Base::SizeAtCompileTime }; - typedef typename ei_traits::AlignedDerivedType AlignedDerivedType; typedef typename ei_traits::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; using Base::derived; @@ -85,21 +69,6 @@ template class MapBase * \sa MapBase::stride() */ inline const Scalar* data() const { return m_data; } - template struct force_aligned_impl { - static AlignedDerivedType run(MapBase& a) { return a.derived(); } - }; - - template struct force_aligned_impl { - static AlignedDerivedType run(MapBase& a) { return a.derived()._convertToEnforceAlignedAccess(); } - }; - - /** \returns an expression equivalent to \c *this but having the \c PacketAccess constant - * set to \c EnforceAlignedAccess. Must be reimplemented by the derived class. */ - AlignedDerivedType forceAligned() - { - return force_aligned_impl::run(*this); - } - inline const Scalar& coeff(int row, int col) const { if(IsRowMajor) @@ -137,7 +106,7 @@ template class MapBase template inline PacketScalar packet(int row, int col) const { - return ei_ploadt + return ei_ploadt (m_data + (IsRowMajor ? col + row * stride() : row + col * stride())); } @@ -145,13 +114,13 @@ template class MapBase template inline PacketScalar packet(int index) const { - return ei_ploadt(m_data + index); + return ei_ploadt(m_data + index); } template inline void writePacket(int row, int col, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + (IsRowMajor ? col + row * stride() : row + col * stride()), x); } @@ -159,7 +128,7 @@ template class MapBase template inline void writePacket(int index, const PacketScalar& x) { - ei_pstoret + ei_pstoret (const_cast(m_data) + index, x); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index d653b25c5..8bfda6595 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -548,6 +548,10 @@ template class MatrixBase inline int stride(void) const { return derived().stride(); } inline const NestByValue nestByValue() const; + inline const ForceAlignedAccess forceAlignedAccess() const; + inline ForceAlignedAccess forceAlignedAccess(); + template inline const typename ei_meta_if,Derived&>::ret forceAlignedAccessIf() const; + template inline typename ei_meta_if,Derived&>::ret forceAlignedAccessIf(); Scalar sum() const; Scalar mean() const; diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index b08f4a1ae..c2dd6b847 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -59,7 +59,7 @@ MatrixBase::stableNorm() const RealScalar invScale = 1; RealScalar ssq = 0; // sum of square enum { - Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? EnforceAlignedAccess : AsRequested + Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0 }; int n = size(); int bi=0; @@ -70,7 +70,7 @@ MatrixBase::stableNorm() const ei_stable_norm_kernel(start(bi), ssq, scale, invScale); } for (; bi(derived(),bi,std::min(blockSize, n - bi)), ssq, scale, invScale); + ei_stable_norm_kernel(segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf(), ssq, scale, invScale); return scale * ei_sqrt(ssq); } diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h index 65268b626..f3e4debf5 100644 --- a/Eigen/src/Core/VectorBlock.h +++ b/Eigen/src/Core/VectorBlock.h @@ -32,10 +32,6 @@ * * \param VectorType the type of the object in which we are taking a sub-vector * \param Size size of the sub-vector we are taking at compile time (optional) - * \param _PacketAccess allows to enforce aligned loads and stores if set to ForceAligned. - * The default is AsRequested. This parameter is internaly used by Eigen - * in expressions such as \code mat.segment() += other; \endcode and most of - * the time this is the only way it is used. * * This class represents an expression of either a fixed-size or dynamic-size sub-vector. * It is the return type of MatrixBase::segment(int,int) and MatrixBase::segment(int) and @@ -59,25 +55,22 @@ * * \sa class Block, MatrixBase::segment(int,int,int,int), MatrixBase::segment(int,int) */ -template -struct ei_traits > +template +struct ei_traits > : public ei_traits::RowsAtCompileTime==1 ? 1 : Size, - ei_traits::ColsAtCompileTime==1 ? 1 : Size, - _PacketAccess> > + ei_traits::ColsAtCompileTime==1 ? 1 : Size> > { }; -template class VectorBlock +template class VectorBlock : public Block::RowsAtCompileTime==1 ? 1 : Size, - ei_traits::ColsAtCompileTime==1 ? 1 : Size, - PacketAccess> + ei_traits::ColsAtCompileTime==1 ? 1 : Size> { typedef Block::RowsAtCompileTime==1 ? 1 : Size, - ei_traits::ColsAtCompileTime==1 ? 1 : Size, - PacketAccess> _Base; + ei_traits::ColsAtCompileTime==1 ? 1 : Size> _Base; enum { IsColVector = ei_traits::ColsAtCompileTime==1 }; diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index c801d8049..ee2a3d27d 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -189,7 +189,6 @@ const unsigned int UnitUpperTriangular = UpperTriangularBit | UnitDiagBit; const unsigned int UnitLowerTriangular = LowerTriangularBit | UnitDiagBit; enum { Unaligned=0, Aligned=1 }; -enum { AsRequested=0, EnforceAlignedAccess=2 }; enum { ConditionalJumpCost = 5 }; enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; enum DirectionType { Vertical, Horizontal, BothDirections }; diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index fb0233591..76ac0eb3e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -37,11 +37,12 @@ template class Flagged; template class StorageBase > class NoAlias; template class NestByValue; +template class ForceAlignedAccess; template class SwapWrapper; template class Minor; -template::Flags&DirectAccessBit) ? HasDirectAccess : NoDirectAccess> class Block; -template class VectorBlock; +template class VectorBlock; template class Transpose; template class Conjugate; template class CwiseNullaryOp; @@ -57,7 +58,7 @@ template class DiagonalProduct; template class Diagonal; -template class Map; +template class Map; template class TriangularBase; template class TriangularView; template class SelfAdjointView;