From 8169c6ac59ce78a7340ebae40f8eabe9df1c8f62 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 18 Feb 2014 16:57:25 +0100 Subject: [PATCH] Simplify implementation of coeff-based products to fully exploit our reduxion mechanisms. If this results in performance regressions, then we should optimize reduxion rather than somehow duplicate the code. --- Eigen/src/Core/ProductEvaluators.h | 180 +++++------------------------ 1 file changed, 27 insertions(+), 153 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 113bcd618..67d0dc80c 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -333,8 +333,11 @@ struct product_evaluator, ProductTag, DenseShape, typedef CoeffBasedProduct CoeffBasedProductType; product_evaluator(const XprType& xpr) - : m_lhsImpl(xpr.lhs()), - m_rhsImpl(xpr.rhs()), + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. m_innerDim(xpr.lhs().cols()) { } @@ -355,31 +358,32 @@ struct product_evaluator, ProductTag, DenseShape, CanVectorizeInner = traits::CanVectorizeInner, Flags = traits::Flags }; - - typedef typename evaluator::type LhsEtorType; - typedef typename evaluator::type RhsEtorType; - typedef etor_product_coeff_impl CoeffImpl; + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + typedef typename evaluator::type LhsEtorType; + typedef typename evaluator::type RhsEtorType; + const CoeffReturnType coeff(Index row, Index col) const { - Scalar res; - CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); - return res; + // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector */ const CoeffReturnType coeff(Index index) const { - Scalar res; const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; - CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); - return res; + // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } template @@ -389,13 +393,17 @@ struct product_evaluator, ProductTag, DenseShape, typedef etor_product_packet_impl PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } protected: - typename evaluator::type m_lhsImpl; - typename evaluator::type m_rhsImpl; + const LhsNested m_lhs; + const RhsNested m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; // TODO: Get rid of m_innerDim if known at compile time Index m_innerDim; @@ -413,143 +421,9 @@ struct product_evaluator, LazyCoeffBasedProduc {} }; -/*************************************************************************** -* Normal product .coeff() implementation (with meta-unrolling) -***************************************************************************/ - -/************************************** -*** Scalar path - no vectorization *** -**************************************/ - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) - { - etor_product_coeff_impl::run(row, col, lhs, rhs, innerDim, res); - res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); - } -}; - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res) - { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - } -}; - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res) - { - eigen_assert(innerDim>0 && "you are using a non initialized matrix"); - res = lhs.coeff(row, 0) * rhs.coeff(0, col); - for(Index i = 1; i < innerDim; ++i) - res += lhs.coeff(row, i) * rhs.coeff(i, col); - } -}; - -/******************************************* -*** Scalar path with inner vectorization *** -*******************************************/ - -template -struct etor_product_coeff_vectorized_unroller -{ - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres) - { - etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); - pres = padd(pres, pmul( lhs.template packet(row, UnrollingIndex) , rhs.template packet(UnrollingIndex, col) )); - } -}; - -template -struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres) - { - pres = pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); - } -}; - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::PacketScalar Packet; - typedef typename Lhs::Index Index; - enum { PacketSize = packet_traits::size }; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) - { - Packet pres; - etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); - res = predux(pres); - } -}; - -template -struct etor_product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower -// NOTE maybe they are now useless since we have a specialization for Block -template -struct etor_product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); - } -}; - -template -struct etor_product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); - } -}; - -template -struct etor_product_coeff_vectorized_dyn_selector -{ - typedef typename Lhs::Index Index; - EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) - { - res = lhs.transpose().cwiseProduct(rhs).sum(); - } -}; - -template -struct etor_product_coeff_impl -{ - typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res) - { - etor_product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, innerDim, res); - } -}; - -/******************* -*** Packet path *** -*******************/ +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ template struct etor_product_packet_impl