diff --git a/Eigen/Core b/Eigen/Core index 0189654a6..046c4c910 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -359,6 +359,7 @@ using std::ptrdiff_t; #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" +#include "src/Core/ProductEvaluators.h" #endif #ifdef EIGEN_USE_BLAS diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index cc8477598..015e7d698 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -446,36 +446,6 @@ protected: typename evaluator::type m_argImpl; }; -// -------------------- Product -------------------- - -template -struct evaluator_impl > : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type evaluator_base; - -// enum { -// EvaluateLhs = ; -// EvaluateRhs = ; -// }; - - evaluator_impl(const XprType& product) : evaluator_base(m_result) - { - // here we process the left and right hand sides with a specialized evaluator - // perhaps this step should be done by the TreeOptimizer to get a canonical tree and reduce evaluator instanciations - // typename product_operand_evaluator::type m_lhsImpl(product.lhs()); - // typename product_operand_evaluator::type m_rhsImpl(product.rhs()); - - // TODO do not rely on previous product mechanism !! - m_result.resize(product.rows(), product.cols()); - m_result.noalias() = product.lhs() * product.rhs(); - } - -protected: - PlainObject m_result; -}; - // -------------------- Map -------------------- template diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index f7824aa80..6e66888c7 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -42,25 +42,16 @@ template class ProductImpl; * */ +// Use ProductReturnType to get correct traits, in particular vectorization flags namespace internal { template struct traits > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all::type LhsCleaned; - typedef typename remove_all::type RhsCleaned; - typedef typename scalar_product_traits::Scalar, typename traits::Scalar>::ReturnType Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; + : traits::Type> +{ + // We want A+B*C to be of type Product and not Product + // TODO: This flag should eventually go in a separate evaluator traits class enum { - RowsAtCompileTime = LhsCleaned::RowsAtCompileTime, - ColsAtCompileTime = RhsCleaned::ColsAtCompileTime, - MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime, - MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order - CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits + Flags = traits::Type>::Flags & ~EvalBeforeNestingBit }; }; } // end namespace internal diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 000000000..aadaa9303 --- /dev/null +++ b/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,394 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or +// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic +// in ProductReturnType to decide. + +template +struct product_evaluator_dispatcher; + +template +struct evaluator_impl > + : product_evaluator_dispatcher, typename ProductReturnType::Type> +{ + typedef Product XprType; + typedef product_evaluator_dispatcher::Type> Base; + + evaluator_impl(const XprType& xpr) : Base(xpr) + { } +}; + +// Case 1: Evaluate all at once +// +// We can view the GeneralProduct class as a part of the product evaluator. +// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct. +// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case. + +template +struct product_evaluator_dispatcher, GeneralProduct > + : public evaluator::PlainObject>::type +{ + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator::type evaluator_base; + + product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result) + { + m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum(); + } + +protected: + PlainObject m_result; +}; + +// For the other three subcases, simply call the evalTo() method of GeneralProduct +// TODO: GeneralProduct should take evaluators, not expression objects. + +template +struct product_evaluator_dispatcher, GeneralProduct > + : public evaluator::PlainObject>::type +{ + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator::type evaluator_base; + + product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result) + { + m_result.resize(xpr.rows(), xpr.cols()); + GeneralProduct(xpr.lhs(), xpr.rhs()).evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template +struct etor_product_coeff_impl; + +template +struct etor_product_packet_impl; + +template +struct product_evaluator_dispatcher, CoeffBasedProduct > + : evaluator_impl_base > +{ + typedef Product XprType; + typedef CoeffBasedProduct CoeffBasedProductType; + + product_evaluator_dispatcher(const XprType& xpr) + : m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()), + m_innerDim(xpr.lhs().cols()) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + // Everything below here is taken from CoeffBasedProduct.h + + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + PacketSize = packet_traits::size, + InnerSize = traits::InnerSize, + CoeffReadCost = traits::CoeffReadCost, + Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + CanVectorizeInner = traits::CanVectorizeInner + }; + + typedef typename evaluator::type LhsEtorType; + typedef typename evaluator::type RhsEtorType; + typedef etor_product_coeff_impl CoeffImpl; + + const CoeffReturnType coeff(Index row, Index col) const + { + Scalar res; + CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + */ + const CoeffReturnType coeff(Index index) const + { + Scalar res; + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template + const PacketReturnType packet(Index row, Index col) const + { + PacketScalar res; + typedef etor_product_packet_impl PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + +protected: + typename evaluator::type m_lhsImpl; + typename evaluator::type m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +/*************************************************************************** +* Normal product .coeff() implementation (with meta-unrolling) +***************************************************************************/ + +/************************************** +*** Scalar path - no vectorization *** +**************************************/ + +template +struct etor_product_coeff_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + { + etor_product_coeff_impl::run(row, col, lhs, rhs, innerDim, res); + res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); + } +}; + +template +struct etor_product_coeff_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + } +}; + +template +struct etor_product_coeff_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = lhs.coeff(row, 0) * rhs.coeff(0, col); + for(Index i = 1; i < innerDim; ++i) + res += lhs.coeff(row, i) * rhs.coeff(i, col); + } +}; + +/******************************************* +*** Scalar path with inner vectorization *** +*******************************************/ + +template +struct etor_product_coeff_vectorized_unroller +{ + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits::size }; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres) + { + etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); + pres = padd(pres, pmul( lhs.template packet(row, UnrollingIndex) , rhs.template packet(UnrollingIndex, col) )); + } +}; + +template +struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres) + { + pres = pmul(lhs.template packet(row, 0) , rhs.template packet(0, col)); + } +}; + +template +struct etor_product_coeff_impl +{ + typedef typename Lhs::PacketScalar Packet; + typedef typename Lhs::Index Index; + enum { PacketSize = packet_traits::size }; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res) + { + Packet pres; + etor_product_coeff_vectorized_unroller::run(row, col, lhs, rhs, innerDim, pres); + etor_product_coeff_impl::run(row, col, lhs, rhs, innerDim, res); + res = predux(pres); + } +}; + +template +struct etor_product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower +// NOTE maybe they are now useless since we have a specialization for Block +template +struct etor_product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); + } +}; + +template +struct etor_product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); + } +}; + +template +struct etor_product_coeff_vectorized_dyn_selector +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res) + { + res = lhs.transpose().cwiseProduct(rhs).sum(); + } +}; + +template +struct etor_product_coeff_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res) + { + etor_product_coeff_vectorized_dyn_selector::run(row, col, lhs, rhs, innerDim, res); + } +}; + +/******************* +*** Packet path *** +*******************/ + +template +struct etor_product_packet_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet(UnrollingIndex, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet(row, UnrollingIndex), pset1(rhs.coeff(UnrollingIndex, col)), res); + } +}; + +template +struct etor_product_packet_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + } +}; + +template +struct etor_product_packet_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); + } +}; + +template +struct etor_product_packet_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + typedef typename Lhs::Index Index; + EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + eigen_assert(innerDim>0 && "you are using a non initialized matrix"); + res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); + for(Index i = 1; i < innerDim; ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 267509c91..a95b5319a 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -50,6 +50,7 @@ void test_evaluators() VERIFY_IS_APPROX_EVALUATOR(w, Vector2d::Zero().transpose()); { + // test product expressions int s = internal::random(1,100); MatrixXf a(s,s), b(s,s), c(s,s), d(s,s); a.setRandom(); @@ -58,13 +59,55 @@ void test_evaluators() d.setRandom(); VERIFY_IS_APPROX_EVALUATOR(d, (a + b)); VERIFY_IS_APPROX_EVALUATOR(d, (a + b).transpose()); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b), a*b); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + c, a*b + c); + VERIFY_IS_APPROX_EVALUATOR2(d, s * prod(a,b), s * a*b); VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b).transpose(), (a*b).transpose()); VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + prod(b,c), a*b + b*c); - -// copy_using_evaluator(d, a.transpose() + (a.transpose() * (b+b))); -// cout << d << endl; } + { + // test product with all possible sizes + int s = internal::random(1,100); + Matrix m11, res11; m11.setRandom(1,1); + Matrix m14, res14; m14.setRandom(1,4); + Matrix m1X, res1X; m1X.setRandom(1,s); + Matrix m41, res41; m41.setRandom(4,1); + Matrix m44, res44; m44.setRandom(4,4); + Matrix m4X, res4X; m4X.setRandom(4,s); + Matrix mX1, resX1; mX1.setRandom(s,1); + Matrix mX4, resX4; mX4.setRandom(s,4); + Matrix mXX, resXX; mXX.setRandom(s,s); + + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m11,m11), m11*m11); + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m14,m41), m14*m41); + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m1X,mX1), m1X*mX1); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m11,m14), m11*m14); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m14,m44), m14*m44); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m1X,mX4), m1X*mX4); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m11,m1X), m11*m1X); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m14,m4X), m14*m4X); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m1X,mXX), m1X*mXX); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m41,m11), m41*m11); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m44,m41), m44*m41); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m4X,mX1), m4X*mX1); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m41,m14), m41*m14); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m44,m44), m44*m44); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m4X,mX4), m4X*mX4); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m41,m1X), m41*m1X); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m44,m4X), m44*m4X); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m4X,mXX), m4X*mXX); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX1,m11), mX1*m11); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX4,m41), mX4*m41); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mXX,mX1), mXX*mX1); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX1,m14), mX1*m14); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX4,m44), mX4*m44); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mXX,mX4), mXX*mX4); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX1,m1X), mX1*m1X); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX4,m4X), mX4*m4X); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mXX,mXX), mXX*mXX); + } + // this does not work because Random is eval-before-nested: // copy_using_evaluator(w, Vector2d::Random().transpose());