* fix multiple temporary copies for coeff based products

* introduce a lazy product version of the coefficient based implementation
  => flagged is not used anymore
  => small outer product are now lazy by default (aliasing is really unlikely for outer products)
This commit is contained in:
Gael Guennebaud 2010-02-09 11:05:39 +01:00
parent 0398e21198
commit 5686eca7b1
8 changed files with 75 additions and 49 deletions

View File

@ -205,7 +205,7 @@ struct Dense {};
#include "src/Core/TriangularMatrix.h" #include "src/Core/TriangularMatrix.h"
#include "src/Core/SelfAdjointView.h" #include "src/Core/SelfAdjointView.h"
#include "src/Core/SolveTriangular.h" #include "src/Core/SolveTriangular.h"
#include "src/Core/products/GeneralUnrolled.h" #include "src/Core/products/CoeffBasedProduct.h"
#include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/GeneralBlockPanelKernel.h"
#include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixVector.h"
#include "src/Core/products/GeneralMatrixMatrix.h" #include "src/Core/products/GeneralMatrixMatrix.h"

View File

@ -70,13 +70,13 @@ class NoAlias
EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other) EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{ other.derived().subTo(m_expression); return m_expression; } { other.derived().subTo(m_expression); return m_expression; }
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs, int NestingFlags>
EIGEN_STRONG_INLINE ExpressionType& operator+=(const GeneralProduct<Lhs,Rhs,CoeffBasedProduct>& other) EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
{ return m_expression.derived() += other.template flagged<0,EvalBeforeAssigningBit|EvalBeforeNestingBit>(); } { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,0>(other.lhs(), other.rhs()); }
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs, int NestingFlags>
EIGEN_STRONG_INLINE ExpressionType& operator-=(const GeneralProduct<Lhs,Rhs,CoeffBasedProduct>& other) EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
{ return m_expression.derived() -= other.template flagged<0,EvalBeforeAssigningBit|EvalBeforeNestingBit>(); } { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,0>(other.lhs(), other.rhs()); }
#endif #endif
protected: protected:

View File

@ -87,12 +87,12 @@ public:
template<int Rows, int Cols> struct ei_product_type_selector<Rows, Cols, 1> { enum { ret = OuterProduct }; }; template<int Rows, int Cols> struct ei_product_type_selector<Rows, Cols, 1> { enum { ret = OuterProduct }; };
template<int Depth> struct ei_product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; template<int Depth> struct ei_product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct ei_product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; template<> struct ei_product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct ei_product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProduct }; }; template<> struct ei_product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProduct }; }; template<> struct ei_product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProduct }; }; template<> struct ei_product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small, Small, 1> { enum { ret = CoeffBasedProduct }; }; template<> struct ei_product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Small, Large, 1> { enum { ret = CoeffBasedProduct }; }; template<> struct ei_product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<Large, Small, 1> { enum { ret = CoeffBasedProduct }; }; template<> struct ei_product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = GemvProduct }; }; template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = GemvProduct }; };
template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = GemvProduct }; }; template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = GemvProduct }; };
@ -134,11 +134,19 @@ struct ProductReturnType
}; };
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,CoeffBasedProduct> struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
{ {
typedef typename ei_nested<Lhs, Rhs::ColsAtCompileTime, typename ei_plain_matrix_type<Lhs>::type >::type LhsNested; typedef typename ei_nested<Lhs, Rhs::ColsAtCompileTime, typename ei_plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename ei_nested<Rhs, Lhs::RowsAtCompileTime, typename ei_plain_matrix_type<Rhs>::type >::type RhsNested; typedef typename ei_nested<Rhs, Lhs::RowsAtCompileTime, typename ei_plain_matrix_type<Rhs>::type >::type RhsNested;
typedef GeneralProduct<LhsNested, RhsNested, CoeffBasedProduct> Type; typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
};
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
{
typedef typename ei_nested<Lhs, Rhs::ColsAtCompileTime, typename ei_plain_matrix_type<Lhs>::type >::type LhsNested;
typedef typename ei_nested<Rhs, Lhs::RowsAtCompileTime, typename ei_plain_matrix_type<Rhs>::type >::type RhsNested;
typedef CoeffBasedProduct<LhsNested, RhsNested, 0> Type;
}; };

View File

@ -83,8 +83,7 @@ class ProductBase : public MatrixBase<Derived>
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType; typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType;
typedef typename ProductReturnType<Lhs,Rhs,CoeffBasedProduct>::Type CoeffBaseProductType; typedef typename ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>::Type LazyCoeffBaseProductType;
typedef Flagged<CoeffBaseProductType,0,EvalBeforeNestingBit> LazyCoeffBaseProductType;
public: public:
typedef typename Base::PlainMatrixType PlainMatrixType; typedef typename Base::PlainMatrixType PlainMatrixType;
@ -112,31 +111,28 @@ class ProductBase : public MatrixBase<Derived>
template<typename Dest> template<typename Dest>
inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); } inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
EIGEN_DEPRECATED const Flagged<ProductBase, 0, EvalBeforeAssigningBit> lazy() const
{ return *this; }
const _LhsNested& lhs() const { return m_lhs; } const _LhsNested& lhs() const { return m_lhs; }
const _RhsNested& rhs() const { return m_rhs; } const _RhsNested& rhs() const { return m_rhs; }
const Diagonal<LazyCoeffBaseProductType,0> diagonal() const const Diagonal<LazyCoeffBaseProductType,0> diagonal() const
{ return Diagonal<LazyCoeffBaseProductType,0>(CoeffBaseProductType(m_lhs, m_rhs)); } { return LazyCoeffBaseProductType(m_lhs, m_rhs); }
Diagonal<LazyCoeffBaseProductType,0> diagonal() Diagonal<LazyCoeffBaseProductType,0> diagonal()
{ return Diagonal<LazyCoeffBaseProductType,0>(CoeffBaseProductType(m_lhs, m_rhs)); } { return LazyCoeffBaseProductType(m_lhs, m_rhs); }
template<int Index> template<int Index>
const Diagonal<LazyCoeffBaseProductType,Index> diagonal() const const Diagonal<LazyCoeffBaseProductType,Index> diagonal() const
{ return Diagonal<LazyCoeffBaseProductType,Index>(CoeffBaseProductType(m_lhs, m_rhs)); } { return LazyCoeffBaseProductType(m_lhs, m_rhs); }
template<int Index> template<int Index>
Diagonal<LazyCoeffBaseProductType,Index> diagonal() Diagonal<LazyCoeffBaseProductType,Index> diagonal()
{ return Diagonal<LazyCoeffBaseProductType,Index>(CoeffBaseProductType(m_lhs, m_rhs)); } { return LazyCoeffBaseProductType(m_lhs, m_rhs); }
const Diagonal<LazyCoeffBaseProductType,Dynamic> diagonal(int index) const const Diagonal<LazyCoeffBaseProductType,Dynamic> diagonal(int index) const
{ return Diagonal<LazyCoeffBaseProductType,Dynamic>(LazyCoeffBaseProductType(CoeffBaseProductType(m_lhs, m_rhs))).diagonal(index); } { return LazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
Diagonal<LazyCoeffBaseProductType,Dynamic> diagonal(int index) Diagonal<LazyCoeffBaseProductType,Dynamic> diagonal(int index)
{ return Diagonal<LazyCoeffBaseProductType,Dynamic>(LazyCoeffBaseProductType(CoeffBaseProductType(m_lhs, m_rhs))).diagonal(index); } { return LazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
// Implicit convertion to the nested type (trigger the evaluation of the product) // Implicit convertion to the nested type (trigger the evaluation of the product)
operator const PlainMatrixType& () const operator const PlainMatrixType& () const

View File

@ -23,11 +23,14 @@
// License and a copy of the GNU General Public License along with // License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>. // Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_GENERAL_UNROLLED_PRODUCT_H #ifndef EIGEN_COEFFBASED_PRODUCT_H
#define EIGEN_GENERAL_UNROLLED_PRODUCT_H #define EIGEN_COEFFBASED_PRODUCT_H
/********************************************************************************* /*********************************************************************************
* Specialization of GeneralProduct<> for products with small fixed sizes * Coefficient based product implementation.
* It is designed for the following use cases:
* - small fixed sizes
* - lazy products
*********************************************************************************/ *********************************************************************************/
/* Since the all the dimensions of the product are small, here we can rely /* Since the all the dimensions of the product are small, here we can rely
@ -42,8 +45,8 @@ struct ei_product_coeff_impl;
template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode> template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
struct ei_product_packet_impl; struct ei_product_packet_impl;
template<typename LhsNested, typename RhsNested> template<typename LhsNested, typename RhsNested, int NestingFlags>
struct ei_traits<GeneralProduct<LhsNested,RhsNested,CoeffBasedProduct> > struct ei_traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
{ {
typedef DenseStorageMatrix DenseStorageType; typedef DenseStorageMatrix DenseStorageType;
typedef typename ei_cleantype<LhsNested>::type _LhsNested; typedef typename ei_cleantype<LhsNested>::type _LhsNested;
@ -79,14 +82,13 @@ struct ei_traits<GeneralProduct<LhsNested,RhsNested,CoeffBasedProduct> >
RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit),
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
| EvalBeforeAssigningBit | NestingFlags
| EvalBeforeNestingBit
| (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0) | (CanVectorizeLhs || CanVectorizeRhs ? PacketAccessBit : 0)
| (LhsFlags & RhsFlags & AlignedBit), | (LhsFlags & RhsFlags & AlignedBit),
CoeffReadCost = InnerSize == Dynamic ? Dynamic CoeffReadCost = 1000,//InnerSize == Dynamic ? Dynamic
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) // : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost, // + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
@ -98,25 +100,27 @@ struct ei_traits<GeneralProduct<LhsNested,RhsNested,CoeffBasedProduct> >
}; };
}; };
template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,RhsNested,CoeffBasedProduct> template<typename LhsNested, typename RhsNested, int NestingFlags>
class CoeffBasedProduct
: ei_no_assignment_operator, : ei_no_assignment_operator,
public MatrixBase<GeneralProduct<LhsNested, RhsNested, CoeffBasedProduct> > public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
{ {
public: public:
typedef MatrixBase<GeneralProduct> Base; typedef MatrixBase<CoeffBasedProduct> Base;
EIGEN_DENSE_PUBLIC_INTERFACE(GeneralProduct) EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
typedef typename Base::PlainMatrixType PlainMatrixType;
private: private:
typedef typename ei_traits<GeneralProduct>::_LhsNested _LhsNested; typedef typename ei_traits<CoeffBasedProduct>::_LhsNested _LhsNested;
typedef typename ei_traits<GeneralProduct>::_RhsNested _RhsNested; typedef typename ei_traits<CoeffBasedProduct>::_RhsNested _RhsNested;
enum { enum {
PacketSize = ei_packet_traits<Scalar>::size, PacketSize = ei_packet_traits<Scalar>::size,
InnerSize = ei_traits<GeneralProduct>::InnerSize, InnerSize = ei_traits<CoeffBasedProduct>::InnerSize,
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
CanVectorizeInner = ei_traits<GeneralProduct>::CanVectorizeInner CanVectorizeInner = ei_traits<CoeffBasedProduct>::CanVectorizeInner
}; };
typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, typedef ei_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
@ -126,7 +130,7 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
public: public:
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
inline GeneralProduct(const Lhs& lhs, const Rhs& rhs) inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs) : m_lhs(lhs), m_rhs(rhs)
{ {
// we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
@ -171,11 +175,27 @@ template<typename LhsNested, typename RhsNested> class GeneralProduct<LhsNested,
return res; return res;
} }
// Implicit convertion to the nested type (trigger the evaluation of the product)
operator const PlainMatrixType& () const
{
m_result = *this;
return m_result;
}
protected: protected:
const LhsNested m_lhs; const LhsNested m_lhs;
const RhsNested m_rhs; const RhsNested m_rhs;
mutable PlainMatrixType m_result;
}; };
// here we need to overload the nested rule for products
// such that the nested type is a const reference to a plain matrix
template<typename Lhs, typename Rhs, int N, typename PlainMatrixType>
struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainMatrixType>
{
typedef PlainMatrixType const& type;
};
/*************************************************************************** /***************************************************************************
* Normal product .coeff() implementation (with meta-unrolling) * Normal product .coeff() implementation (with meta-unrolling)
@ -386,4 +406,4 @@ struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMod
} }
}; };
#endif // EIGEN_GENERAL_UNROLLED_PRODUCT_H #endif // EIGEN_COEFFBASED_PRODUCT_H

View File

@ -269,6 +269,6 @@ namespace Architecture
enum DenseStorageMatrix {}; enum DenseStorageMatrix {};
enum DenseStorageArray {}; enum DenseStorageArray {};
enum { OuterProduct, InnerProduct, CoeffBasedProduct, GemvProduct, GemmProduct }; enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
#endif // EIGEN_CONSTANTS_H #endif // EIGEN_CONSTANTS_H

View File

@ -52,6 +52,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename BinOp, typename MatrixType> class SelfCwiseBinaryOp; template<typename BinOp, typename MatrixType> class SelfCwiseBinaryOp;
template<typename Derived, typename Lhs, typename Rhs> class ProductBase; template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
template<typename Lhs, typename Rhs, int Mode> class GeneralProduct; template<typename Lhs, typename Rhs, int Mode> class GeneralProduct;
template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct;
template<typename Derived> class DiagonalBase; template<typename Derived> class DiagonalBase;
template<typename _DiagonalVectorType> class DiagonalWrapper; template<typename _DiagonalVectorType> class DiagonalWrapper;

View File

@ -61,6 +61,7 @@
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
YOU_MADE_A_PROGRAMMING_MISTAKE, YOU_MADE_A_PROGRAMMING_MISTAKE,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR, YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR, YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC, UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,