mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-12 09:23:12 +08:00
Move (part of) evaluation of products to evaluator objects.
* Copy implementation from CoeffBasedProduct. * Copy implementation from GeneralProduct in InnerProduct case. * For GeneralProduct in other cases, call the evalTo() member function with expression objects in constructor of evaluator.
This commit is contained in:
parent
9629ba361a
commit
069fd0e4be
@ -359,6 +359,7 @@ using std::ptrdiff_t;
|
|||||||
#include "src/Core/Product.h"
|
#include "src/Core/Product.h"
|
||||||
#include "src/Core/CoreEvaluators.h"
|
#include "src/Core/CoreEvaluators.h"
|
||||||
#include "src/Core/AssignEvaluator.h"
|
#include "src/Core/AssignEvaluator.h"
|
||||||
|
#include "src/Core/ProductEvaluators.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EIGEN_USE_BLAS
|
#ifdef EIGEN_USE_BLAS
|
||||||
|
@ -446,36 +446,6 @@ protected:
|
|||||||
typename evaluator<ArgType>::type m_argImpl;
|
typename evaluator<ArgType>::type m_argImpl;
|
||||||
};
|
};
|
||||||
|
|
||||||
// -------------------- Product --------------------
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs>
|
|
||||||
struct evaluator_impl<Product<Lhs,Rhs> > : public evaluator<typename Product<Lhs,Rhs>::PlainObject>::type
|
|
||||||
{
|
|
||||||
typedef Product<Lhs,Rhs> XprType;
|
|
||||||
typedef typename XprType::PlainObject PlainObject;
|
|
||||||
typedef typename evaluator<PlainObject>::type evaluator_base;
|
|
||||||
|
|
||||||
// enum {
|
|
||||||
// EvaluateLhs = ;
|
|
||||||
// EvaluateRhs = ;
|
|
||||||
// };
|
|
||||||
|
|
||||||
evaluator_impl(const XprType& product) : evaluator_base(m_result)
|
|
||||||
{
|
|
||||||
// here we process the left and right hand sides with a specialized evaluator
|
|
||||||
// perhaps this step should be done by the TreeOptimizer to get a canonical tree and reduce evaluator instanciations
|
|
||||||
// typename product_operand_evaluator<Lhs>::type m_lhsImpl(product.lhs());
|
|
||||||
// typename product_operand_evaluator<Rhs>::type m_rhsImpl(product.rhs());
|
|
||||||
|
|
||||||
// TODO do not rely on previous product mechanism !!
|
|
||||||
m_result.resize(product.rows(), product.cols());
|
|
||||||
m_result.noalias() = product.lhs() * product.rhs();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
PlainObject m_result;
|
|
||||||
};
|
|
||||||
|
|
||||||
// -------------------- Map --------------------
|
// -------------------- Map --------------------
|
||||||
|
|
||||||
template<typename Derived, int AccessorsType>
|
template<typename Derived, int AccessorsType>
|
||||||
|
@ -42,25 +42,16 @@ template<typename Lhs, typename Rhs, typename StorageKind> class ProductImpl;
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Use ProductReturnType to get correct traits, in particular vectorization flags
|
||||||
namespace internal {
|
namespace internal {
|
||||||
template<typename Lhs, typename Rhs>
|
template<typename Lhs, typename Rhs>
|
||||||
struct traits<Product<Lhs, Rhs> >
|
struct traits<Product<Lhs, Rhs> >
|
||||||
{
|
: traits<typename ProductReturnType<Lhs, Rhs>::Type>
|
||||||
typedef MatrixXpr XprKind;
|
{
|
||||||
typedef typename remove_all<Lhs>::type LhsCleaned;
|
// We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
|
||||||
typedef typename remove_all<Rhs>::type RhsCleaned;
|
// TODO: This flag should eventually go in a separate evaluator traits class
|
||||||
typedef typename scalar_product_traits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
|
|
||||||
typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
|
|
||||||
typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
|
|
||||||
typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
|
|
||||||
typename traits<RhsCleaned>::Index>::type Index;
|
|
||||||
enum {
|
enum {
|
||||||
RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
|
Flags = traits<typename ProductReturnType<Lhs, Rhs>::Type>::Flags & ~EvalBeforeNestingBit
|
||||||
ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
|
|
||||||
MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
|
|
||||||
MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
|
|
||||||
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0), // TODO should be no storage order
|
|
||||||
CoeffReadCost = 0 // TODO CoeffReadCost should not be part of the expression traits
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
394
Eigen/src/Core/ProductEvaluators.h
Normal file
394
Eigen/src/Core/ProductEvaluators.h
Normal file
@ -0,0 +1,394 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||||
|
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||||
|
// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef EIGEN_PRODUCTEVALUATORS_H
|
||||||
|
#define EIGEN_PRODUCTEVALUATORS_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
// We can evaluate the product either all at once, like GeneralProduct and its evalTo() function, or
|
||||||
|
// traverse the matrix coefficient by coefficient, like CoeffBasedProduct. Use the existing logic
|
||||||
|
// in ProductReturnType to decide.
|
||||||
|
|
||||||
|
template<typename XprType, typename ProductType>
|
||||||
|
struct product_evaluator_dispatcher;
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs>
|
||||||
|
struct evaluator_impl<Product<Lhs, Rhs> >
|
||||||
|
: product_evaluator_dispatcher<Product<Lhs, Rhs>, typename ProductReturnType<Lhs, Rhs>::Type>
|
||||||
|
{
|
||||||
|
typedef Product<Lhs, Rhs> XprType;
|
||||||
|
typedef product_evaluator_dispatcher<XprType, typename ProductReturnType<Lhs, Rhs>::Type> Base;
|
||||||
|
|
||||||
|
evaluator_impl(const XprType& xpr) : Base(xpr)
|
||||||
|
{ }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Case 1: Evaluate all at once
|
||||||
|
//
|
||||||
|
// We can view the GeneralProduct class as a part of the product evaluator.
|
||||||
|
// Four sub-cases: InnerProduct, OuterProduct, GemmProduct and GemvProduct.
|
||||||
|
// InnerProduct is special because GeneralProduct does not have an evalTo() method in this case.
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs>
|
||||||
|
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, InnerProduct> >
|
||||||
|
: public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
|
||||||
|
{
|
||||||
|
typedef Product<Lhs, Rhs> XprType;
|
||||||
|
typedef typename XprType::PlainObject PlainObject;
|
||||||
|
typedef typename evaluator<PlainObject>::type evaluator_base;
|
||||||
|
|
||||||
|
product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
|
||||||
|
{
|
||||||
|
m_result.coeffRef(0,0) = (xpr.lhs().transpose().cwiseProduct(xpr.rhs())).sum();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
PlainObject m_result;
|
||||||
|
};
|
||||||
|
|
||||||
|
// For the other three subcases, simply call the evalTo() method of GeneralProduct
|
||||||
|
// TODO: GeneralProduct should take evaluators, not expression objects.
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, int ProductType>
|
||||||
|
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, GeneralProduct<Lhs, Rhs, ProductType> >
|
||||||
|
: public evaluator<typename Product<Lhs, Rhs>::PlainObject>::type
|
||||||
|
{
|
||||||
|
typedef Product<Lhs, Rhs> XprType;
|
||||||
|
typedef typename XprType::PlainObject PlainObject;
|
||||||
|
typedef typename evaluator<PlainObject>::type evaluator_base;
|
||||||
|
|
||||||
|
product_evaluator_dispatcher(const XprType& xpr) : evaluator_base(m_result)
|
||||||
|
{
|
||||||
|
m_result.resize(xpr.rows(), xpr.cols());
|
||||||
|
GeneralProduct<Lhs, Rhs, ProductType>(xpr.lhs(), xpr.rhs()).evalTo(m_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
PlainObject m_result;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Case 2: Evaluate coeff by coeff
|
||||||
|
//
|
||||||
|
// This is mostly taken from CoeffBasedProduct.h
|
||||||
|
// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
|
||||||
|
// for the inner dimension of the product, because evaluator object do not know their size.
|
||||||
|
|
||||||
|
template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||||
|
struct etor_product_coeff_impl;
|
||||||
|
|
||||||
|
template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
|
struct etor_product_packet_impl;
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename LhsNested, typename RhsNested, int Flags>
|
||||||
|
struct product_evaluator_dispatcher<Product<Lhs, Rhs>, CoeffBasedProduct<LhsNested, RhsNested, Flags> >
|
||||||
|
: evaluator_impl_base<Product<Lhs, Rhs> >
|
||||||
|
{
|
||||||
|
typedef Product<Lhs, Rhs> XprType;
|
||||||
|
typedef CoeffBasedProduct<LhsNested, RhsNested, Flags> CoeffBasedProductType;
|
||||||
|
|
||||||
|
product_evaluator_dispatcher(const XprType& xpr)
|
||||||
|
: m_lhsImpl(xpr.lhs()),
|
||||||
|
m_rhsImpl(xpr.rhs()),
|
||||||
|
m_innerDim(xpr.lhs().cols())
|
||||||
|
{ }
|
||||||
|
|
||||||
|
typedef typename XprType::Index Index;
|
||||||
|
typedef typename XprType::Scalar Scalar;
|
||||||
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
typedef typename XprType::PacketScalar PacketScalar;
|
||||||
|
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||||
|
|
||||||
|
// Everything below here is taken from CoeffBasedProduct.h
|
||||||
|
|
||||||
|
enum {
|
||||||
|
RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
|
||||||
|
PacketSize = packet_traits<Scalar>::size,
|
||||||
|
InnerSize = traits<CoeffBasedProductType>::InnerSize,
|
||||||
|
CoeffReadCost = traits<CoeffBasedProductType>::CoeffReadCost,
|
||||||
|
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
|
||||||
|
CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef typename evaluator<Lhs>::type LhsEtorType;
|
||||||
|
typedef typename evaluator<Rhs>::type RhsEtorType;
|
||||||
|
typedef etor_product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
|
||||||
|
Unroll ? InnerSize-1 : Dynamic,
|
||||||
|
LhsEtorType, RhsEtorType, Scalar> CoeffImpl;
|
||||||
|
|
||||||
|
const CoeffReturnType coeff(Index row, Index col) const
|
||||||
|
{
|
||||||
|
Scalar res;
|
||||||
|
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
|
||||||
|
* which is why we don't set the LinearAccessBit.
|
||||||
|
*/
|
||||||
|
const CoeffReturnType coeff(Index index) const
|
||||||
|
{
|
||||||
|
Scalar res;
|
||||||
|
const Index row = RowsAtCompileTime == 1 ? 0 : index;
|
||||||
|
const Index col = RowsAtCompileTime == 1 ? index : 0;
|
||||||
|
CoeffImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
const PacketReturnType packet(Index row, Index col) const
|
||||||
|
{
|
||||||
|
PacketScalar res;
|
||||||
|
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||||
|
Unroll ? InnerSize-1 : Dynamic,
|
||||||
|
LhsEtorType, RhsEtorType, PacketScalar, LoadMode> PacketImpl;
|
||||||
|
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
typename evaluator<Lhs>::type m_lhsImpl;
|
||||||
|
typename evaluator<Rhs>::type m_rhsImpl;
|
||||||
|
|
||||||
|
// TODO: Get rid of m_innerDim if known at compile time
|
||||||
|
Index m_innerDim;
|
||||||
|
};
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* Normal product .coeff() implementation (with meta-unrolling)
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/**************************************
|
||||||
|
*** Scalar path - no vectorization ***
|
||||||
|
**************************************/
|
||||||
|
|
||||||
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||||
|
struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
|
||||||
|
{
|
||||||
|
etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
|
||||||
|
res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||||
|
struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
|
||||||
|
{
|
||||||
|
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||||
|
struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
|
||||||
|
{
|
||||||
|
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||||
|
res = lhs.coeff(row, 0) * rhs.coeff(0, col);
|
||||||
|
for(Index i = 1; i < innerDim; ++i)
|
||||||
|
res += lhs.coeff(row, i) * rhs.coeff(i, col);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*******************************************
|
||||||
|
*** Scalar path with inner vectorization ***
|
||||||
|
*******************************************/
|
||||||
|
|
||||||
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
|
||||||
|
struct etor_product_coeff_vectorized_unroller
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
|
||||||
|
{
|
||||||
|
etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
|
||||||
|
pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename Packet>
|
||||||
|
struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
|
||||||
|
{
|
||||||
|
pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
|
||||||
|
struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::PacketScalar Packet;
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
|
||||||
|
{
|
||||||
|
Packet pres;
|
||||||
|
etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
|
||||||
|
etor_product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, innerDim, res);
|
||||||
|
res = predux(pres);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
|
||||||
|
struct etor_product_coeff_vectorized_dyn_selector
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||||
|
{
|
||||||
|
res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
|
||||||
|
// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
|
||||||
|
template<typename Lhs, typename Rhs, int RhsCols>
|
||||||
|
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||||
|
{
|
||||||
|
res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, int LhsRows>
|
||||||
|
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||||
|
{
|
||||||
|
res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs>
|
||||||
|
struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
|
||||||
|
{
|
||||||
|
res = lhs.transpose().cwiseProduct(rhs).sum();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename RetScalar>
|
||||||
|
struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
|
||||||
|
{
|
||||||
|
etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*******************
|
||||||
|
*** Packet path ***
|
||||||
|
*******************/
|
||||||
|
|
||||||
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
|
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||||
|
{
|
||||||
|
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||||
|
res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
|
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
|
||||||
|
{
|
||||||
|
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
|
||||||
|
res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
|
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||||
|
{
|
||||||
|
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
|
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
|
||||||
|
{
|
||||||
|
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
|
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||||
|
{
|
||||||
|
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||||
|
res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
|
||||||
|
for(Index i = 1; i < innerDim; ++i)
|
||||||
|
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
|
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
|
||||||
|
{
|
||||||
|
typedef typename Lhs::Index Index;
|
||||||
|
EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
|
||||||
|
{
|
||||||
|
eigen_assert(innerDim>0 && "you are using a non initialized matrix");
|
||||||
|
res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
|
||||||
|
for(Index i = 1; i < innerDim; ++i)
|
||||||
|
res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_PRODUCT_EVALUATORS_H
|
@ -50,6 +50,7 @@ void test_evaluators()
|
|||||||
VERIFY_IS_APPROX_EVALUATOR(w, Vector2d::Zero().transpose());
|
VERIFY_IS_APPROX_EVALUATOR(w, Vector2d::Zero().transpose());
|
||||||
|
|
||||||
{
|
{
|
||||||
|
// test product expressions
|
||||||
int s = internal::random<int>(1,100);
|
int s = internal::random<int>(1,100);
|
||||||
MatrixXf a(s,s), b(s,s), c(s,s), d(s,s);
|
MatrixXf a(s,s), b(s,s), c(s,s), d(s,s);
|
||||||
a.setRandom();
|
a.setRandom();
|
||||||
@ -58,13 +59,55 @@ void test_evaluators()
|
|||||||
d.setRandom();
|
d.setRandom();
|
||||||
VERIFY_IS_APPROX_EVALUATOR(d, (a + b));
|
VERIFY_IS_APPROX_EVALUATOR(d, (a + b));
|
||||||
VERIFY_IS_APPROX_EVALUATOR(d, (a + b).transpose());
|
VERIFY_IS_APPROX_EVALUATOR(d, (a + b).transpose());
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b), a*b);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + c, a*b + c);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(d, s * prod(a,b), s * a*b);
|
||||||
VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b).transpose(), (a*b).transpose());
|
VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b).transpose(), (a*b).transpose());
|
||||||
VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + prod(b,c), a*b + b*c);
|
VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + prod(b,c), a*b + b*c);
|
||||||
|
|
||||||
// copy_using_evaluator(d, a.transpose() + (a.transpose() * (b+b)));
|
|
||||||
// cout << d << endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// test product with all possible sizes
|
||||||
|
int s = internal::random<int>(1,100);
|
||||||
|
Matrix<float, 1, 1> m11, res11; m11.setRandom(1,1);
|
||||||
|
Matrix<float, 1, 4> m14, res14; m14.setRandom(1,4);
|
||||||
|
Matrix<float, 1,Dynamic> m1X, res1X; m1X.setRandom(1,s);
|
||||||
|
Matrix<float, 4, 1> m41, res41; m41.setRandom(4,1);
|
||||||
|
Matrix<float, 4, 4> m44, res44; m44.setRandom(4,4);
|
||||||
|
Matrix<float, 4,Dynamic> m4X, res4X; m4X.setRandom(4,s);
|
||||||
|
Matrix<float,Dynamic, 1> mX1, resX1; mX1.setRandom(s,1);
|
||||||
|
Matrix<float,Dynamic, 4> mX4, resX4; mX4.setRandom(s,4);
|
||||||
|
Matrix<float,Dynamic,Dynamic> mXX, resXX; mXX.setRandom(s,s);
|
||||||
|
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m11,m11), m11*m11);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m14,m41), m14*m41);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m1X,mX1), m1X*mX1);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m11,m14), m11*m14);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m14,m44), m14*m44);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m1X,mX4), m1X*mX4);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m11,m1X), m11*m1X);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m14,m4X), m14*m4X);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m1X,mXX), m1X*mXX);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m41,m11), m41*m11);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m44,m41), m44*m41);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m4X,mX1), m4X*mX1);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m41,m14), m41*m14);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m44,m44), m44*m44);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m4X,mX4), m4X*mX4);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m41,m1X), m41*m1X);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m44,m4X), m44*m4X);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m4X,mXX), m4X*mXX);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX1,m11), mX1*m11);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX4,m41), mX4*m41);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mXX,mX1), mXX*mX1);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX1,m14), mX1*m14);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX4,m44), mX4*m44);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mXX,mX4), mXX*mX4);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX1,m1X), mX1*m1X);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX4,m4X), mX4*m4X);
|
||||||
|
VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mXX,mXX), mXX*mXX);
|
||||||
|
}
|
||||||
|
|
||||||
// this does not work because Random is eval-before-nested:
|
// this does not work because Random is eval-before-nested:
|
||||||
// copy_using_evaluator(w, Vector2d::Random().transpose());
|
// copy_using_evaluator(w, Vector2d::Random().transpose());
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user