mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Add a *very efficient* evaluation path for both col-major matrix * vector
and vector * row-major products. Currently, it is enabled only is the matrix has DirectAccessBit flag and the product is "large enough". Added the respective unit tests in test/product/cpp.
This commit is contained in:
parent
6f71ef8277
commit
b7bd1b3446
@ -59,10 +59,7 @@ namespace Eigen {
|
|||||||
#include "src/Core/CommaInitializer.h"
|
#include "src/Core/CommaInitializer.h"
|
||||||
#include "src/Core/Extract.h"
|
#include "src/Core/Extract.h"
|
||||||
#include "src/Core/Part.h"
|
#include "src/Core/Part.h"
|
||||||
|
|
||||||
#ifndef EIGEN_EXTERN_INSTANTIATIONS
|
|
||||||
#include "src/Core/CacheFriendlyProduct.h"
|
#include "src/Core/CacheFriendlyProduct.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace Eigen
|
} // namespace Eigen
|
||||||
|
|
||||||
|
@ -249,7 +249,6 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
|
|||||||
{
|
{
|
||||||
static void run(Derived1 &dst, const Derived2 &src)
|
static void run(Derived1 &dst, const Derived2 &src)
|
||||||
{
|
{
|
||||||
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
|
|
||||||
const int innerSize = dst.innerSize();
|
const int innerSize = dst.innerSize();
|
||||||
const int outerSize = dst.outerSize();
|
const int outerSize = dst.outerSize();
|
||||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||||
|
@ -155,7 +155,6 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block
|
|||||||
return m_matrix.const_cast_derived()
|
return m_matrix.const_cast_derived()
|
||||||
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const Scalar coeff(int index) const
|
inline const Scalar coeff(int index) const
|
||||||
|
@ -25,6 +25,8 @@
|
|||||||
#ifndef EIGEN_CACHE_FRIENDLY_PRODUCT_H
|
#ifndef EIGEN_CACHE_FRIENDLY_PRODUCT_H
|
||||||
#define EIGEN_CACHE_FRIENDLY_PRODUCT_H
|
#define EIGEN_CACHE_FRIENDLY_PRODUCT_H
|
||||||
|
|
||||||
|
#ifndef EIGEN_EXTERN_INSTANTIATIONS
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
static void ei_cache_friendly_product(
|
static void ei_cache_friendly_product(
|
||||||
int _rows, int _cols, int depth,
|
int _rows, int _cols, int depth,
|
||||||
@ -77,8 +79,6 @@ static void ei_cache_friendly_product(
|
|||||||
MaxL2BlockSize = EIGEN_TUNE_FOR_L2_CACHE_SIZE / sizeof(Scalar)
|
MaxL2BlockSize = EIGEN_TUNE_FOR_L2_CACHE_SIZE / sizeof(Scalar)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
//const bool rhsIsAligned = (PacketSize==1) || (((rhsStride%PacketSize) == 0) && (size_t(rhs)%16==0));
|
|
||||||
const bool resIsAligned = (PacketSize==1) || (((resStride%PacketSize) == 0) && (size_t(res)%16==0));
|
const bool resIsAligned = (PacketSize==1) || (((resStride%PacketSize) == 0) && (size_t(res)%16==0));
|
||||||
|
|
||||||
const int remainingSize = depth % PacketSize;
|
const int remainingSize = depth % PacketSize;
|
||||||
@ -357,4 +357,165 @@ static void ei_cache_friendly_product(
|
|||||||
free(block);
|
free(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif // EIGEN_EXTERN_INSTANTIATIONS
|
||||||
|
|
||||||
|
/* Optimized col-major matrix * vector product:
|
||||||
|
* This algorithm processes 4 columns at onces that allows to both reduce
|
||||||
|
* the number of load/stores of the result by a factor 4 and to reduce
|
||||||
|
* the instruction dependency. Moreover, we know that all bands have the
|
||||||
|
* same alignment pattern.
|
||||||
|
* TODO: since rhs gets evaluated only once, no need to evaluate it
|
||||||
|
*/
|
||||||
|
template<typename Scalar, typename RhsType>
|
||||||
|
EIGEN_DONT_INLINE static void ei_cache_friendly_product(
|
||||||
|
int size,
|
||||||
|
const Scalar* lhs, int lhsStride,
|
||||||
|
const RhsType& rhs,
|
||||||
|
Scalar* res)
|
||||||
|
{
|
||||||
|
#ifdef _EIGEN_ACCUMULATE_PACKETS
|
||||||
|
#error _EIGEN_ACCUMULATE_PACKETS has already been defined
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define _EIGEN_ACCUMULATE_PACKETS(A0,A13,A2,OFFSET) \
|
||||||
|
ei_pstore(&res[j OFFSET], \
|
||||||
|
ei_padd(ei_pload(&res[j OFFSET]), \
|
||||||
|
ei_padd( \
|
||||||
|
ei_padd(ei_pmul(ptmp0,ei_pload ## A0(&lhs[j OFFSET +iN0])),ei_pmul(ptmp1,ei_pload ## A13(&lhs[j OFFSET +iN1]))), \
|
||||||
|
ei_padd(ei_pmul(ptmp2,ei_pload ## A2(&lhs[j OFFSET +iN2])),ei_pmul(ptmp3,ei_pload ## A13(&lhs[j OFFSET +iN3]))) )))
|
||||||
|
|
||||||
|
asm("#begin matrix_vector_product");
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||||
|
const int PacketSize = sizeof(Packet)/sizeof(Scalar);
|
||||||
|
|
||||||
|
enum { AllAligned, EvenAligned, FirstAligned, NoneAligned };
|
||||||
|
const int columnsAtOnce = 4;
|
||||||
|
const int peels = 2;
|
||||||
|
const int PacketAlignedMask = PacketSize-1;
|
||||||
|
const int PeelAlignedMask = PacketSize*peels-1;
|
||||||
|
const bool Vectorized = sizeof(Packet) != sizeof(Scalar);
|
||||||
|
|
||||||
|
// How many coeffs of the result do we have to skip to be aligned.
|
||||||
|
// Here we assume data are at least aligned on the base scalar type that is mandatory anyway.
|
||||||
|
const int alignedStart = Vectorized
|
||||||
|
? std::min<int>( (PacketSize - ((size_t(res)/sizeof(Scalar)) & PacketAlignedMask)) & PacketAlignedMask, size)
|
||||||
|
: 0;
|
||||||
|
const int alignedSize = alignedStart + ((size-alignedStart) & ~PacketAlignedMask);
|
||||||
|
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : 0;
|
||||||
|
|
||||||
|
const int alignmentStep = lhsStride % PacketSize;
|
||||||
|
int alignmentPattern = alignmentStep==0 ? AllAligned
|
||||||
|
: alignmentStep==2 ? EvenAligned
|
||||||
|
: FirstAligned;
|
||||||
|
|
||||||
|
// find how many column do we have to skip to be aligned with the result (if possible)
|
||||||
|
int skipColumns=0;
|
||||||
|
for (; skipColumns<PacketSize; ++skipColumns)
|
||||||
|
{
|
||||||
|
if (alignedStart == alignmentStep*skipColumns)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (skipColumns==PacketSize)
|
||||||
|
alignmentPattern = NoneAligned;
|
||||||
|
skipColumns = std::min(skipColumns,rhs.size());
|
||||||
|
if (alignmentPattern!=NoneAligned)
|
||||||
|
for (int i=0; i<skipColumns; i++)
|
||||||
|
{
|
||||||
|
Scalar tmp0 = rhs[i];
|
||||||
|
Packet ptmp0 = ei_pset1(tmp0);
|
||||||
|
int iN0 = i*lhsStride;
|
||||||
|
// process first unaligned result's coeffs
|
||||||
|
for (int j=0; j<alignedStart; j++)
|
||||||
|
res[j] += tmp0 * lhs[j+iN0];
|
||||||
|
// process aligned result's coeffs (we know the lhs columns are not aligned)
|
||||||
|
for (int j = alignedStart;j<alignedSize;j+=PacketSize)
|
||||||
|
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&lhs[j+iN0])),ei_pload(&res[j])));
|
||||||
|
// process remaining result's coeffs
|
||||||
|
for (int j=alignedSize; j<size; j++)
|
||||||
|
res[j] += tmp0 * lhs[j+iN0];
|
||||||
|
}
|
||||||
|
|
||||||
|
int columnBound = (rhs.size()/columnsAtOnce)*columnsAtOnce;
|
||||||
|
for (int i=0; i<columnBound; i+=columnsAtOnce)
|
||||||
|
{
|
||||||
|
Scalar tmp0 = rhs[i];
|
||||||
|
Packet ptmp0 = ei_pset1(tmp0);
|
||||||
|
Scalar tmp1 = rhs[i+1];
|
||||||
|
Packet ptmp1 = ei_pset1(tmp1);
|
||||||
|
Scalar tmp2 = rhs[i+2];
|
||||||
|
Packet ptmp2 = ei_pset1(tmp2);
|
||||||
|
Scalar tmp3 = rhs[i+3];
|
||||||
|
Packet ptmp3 = ei_pset1(tmp3);
|
||||||
|
int iN0 = i*lhsStride;
|
||||||
|
int iN1 = (i+1)*lhsStride;
|
||||||
|
int iN2 = (i+2)*lhsStride;
|
||||||
|
int iN3 = (i+3)*lhsStride;
|
||||||
|
|
||||||
|
// process initial unaligned coeffs
|
||||||
|
for (int j=0; j<alignedStart; j++)
|
||||||
|
res[j] += tmp0 * lhs[j+iN0] + tmp1 * lhs[j+iN1] + tmp2 * lhs[j+iN2] + tmp3 * lhs[j+iN3];
|
||||||
|
|
||||||
|
if (alignedSize>0)
|
||||||
|
{
|
||||||
|
switch(alignmentPattern)
|
||||||
|
{
|
||||||
|
case AllAligned:
|
||||||
|
for (int j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||||
|
_EIGEN_ACCUMULATE_PACKETS(,,,);
|
||||||
|
break;
|
||||||
|
case EvenAligned:
|
||||||
|
for (int j = alignedStart; j<alignedSize; j+=PacketSize)
|
||||||
|
_EIGEN_ACCUMULATE_PACKETS(,u,,);
|
||||||
|
break;
|
||||||
|
case FirstAligned:
|
||||||
|
if (peels>1)
|
||||||
|
for (int j = alignedStart; j<peeledSize; j+=peels*PacketSize)
|
||||||
|
{
|
||||||
|
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
||||||
|
_EIGEN_ACCUMULATE_PACKETS(,u,u,+PacketSize);
|
||||||
|
if (peels>2) _EIGEN_ACCUMULATE_PACKETS(,u,u,+2*PacketSize);
|
||||||
|
if (peels>3) _EIGEN_ACCUMULATE_PACKETS(,u,u,+3*PacketSize);
|
||||||
|
if (peels>4) _EIGEN_ACCUMULATE_PACKETS(,u,u,+4*PacketSize);
|
||||||
|
if (peels>5) _EIGEN_ACCUMULATE_PACKETS(,u,u,+5*PacketSize);
|
||||||
|
if (peels>6) _EIGEN_ACCUMULATE_PACKETS(,u,u,+6*PacketSize);
|
||||||
|
if (peels>7) _EIGEN_ACCUMULATE_PACKETS(,u,u,+7*PacketSize);
|
||||||
|
}
|
||||||
|
for (int j = peeledSize; j<alignedSize; j+=PacketSize)
|
||||||
|
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
for (int j = peeledSize; j<alignedSize; j+=PacketSize)
|
||||||
|
_EIGEN_ACCUMULATE_PACKETS(u,u,u,);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// process remaining coeffs
|
||||||
|
for (int j=alignedSize; j<size; j++)
|
||||||
|
res[j] += tmp0 * lhs[j+iN0] + tmp1 * lhs[j+iN1] + tmp2 * lhs[j+iN2] + tmp3 * lhs[j+iN3];
|
||||||
|
}
|
||||||
|
for (int i=columnBound; i<rhs.size(); i++)
|
||||||
|
{
|
||||||
|
Scalar tmp0 = rhs[i];
|
||||||
|
Packet ptmp0 = ei_pset1(tmp0);
|
||||||
|
int iN0 = i*lhsStride;
|
||||||
|
if (alignedSize>0)
|
||||||
|
{
|
||||||
|
bool aligned0 = (iN0 % PacketSize) == 0;
|
||||||
|
if (aligned0)
|
||||||
|
for (int j = 0;j<alignedSize;j+=PacketSize)
|
||||||
|
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_pload(&lhs[j+iN0])),ei_pload(&res[j])));
|
||||||
|
else
|
||||||
|
for (int j = 0;j<alignedSize;j+=PacketSize)
|
||||||
|
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&lhs[j+iN0])),ei_pload(&res[j])));
|
||||||
|
}
|
||||||
|
// process remaining scalars
|
||||||
|
for (int j=alignedSize; j<size; j++)
|
||||||
|
res[j] += tmp0 * lhs[j+iN0];
|
||||||
|
}
|
||||||
|
asm("#end matrix_vector_product");
|
||||||
|
|
||||||
|
#undef _EIGEN_ACCUMULATE_PACKETS
|
||||||
|
}
|
||||||
|
|
||||||
#endif // EIGEN_CACHE_FRIENDLY_PRODUCT_H
|
#endif // EIGEN_CACHE_FRIENDLY_PRODUCT_H
|
||||||
|
@ -250,8 +250,8 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
const _LhsNested& lhs() const { return m_lhs; }
|
inline const _LhsNested& lhs() const { return m_lhs; }
|
||||||
const _RhsNested& rhs() const { return m_rhs; }
|
inline const _RhsNested& rhs() const { return m_rhs; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const LhsNested m_lhs;
|
const LhsNested m_lhs;
|
||||||
@ -480,11 +480,22 @@ struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMod
|
|||||||
* Cache friendly product callers and specific nested evaluation strategies
|
* Cache friendly product callers and specific nested evaluation strategies
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
template<typename Scalar, typename RhsType>
|
||||||
|
static void ei_cache_friendly_product(
|
||||||
|
int size, const Scalar* lhs, int lhsStride, const RhsType& rhs, Scalar* res);
|
||||||
|
|
||||||
|
enum {
|
||||||
|
HasDirectAccess,
|
||||||
|
NoDirectAccess
|
||||||
|
};
|
||||||
|
|
||||||
template<typename ProductType,
|
template<typename ProductType,
|
||||||
int LhsRows = ei_traits<ProductType>::RowsAtCompileTime,
|
int LhsRows = ei_traits<ProductType>::RowsAtCompileTime,
|
||||||
int LhsOrder = int(ei_traits<ProductType>::LhsFlags)&RowMajorBit ? RowMajor : ColMajor,
|
int LhsOrder = int(ei_traits<ProductType>::LhsFlags)&RowMajorBit ? RowMajor : ColMajor,
|
||||||
|
int LhsHasDirectAccess = int(ei_traits<ProductType>::LhsFlags)&DirectAccessBit? HasDirectAccess : NoDirectAccess,
|
||||||
int RhsCols = ei_traits<ProductType>::ColsAtCompileTime,
|
int RhsCols = ei_traits<ProductType>::ColsAtCompileTime,
|
||||||
int RhsOrder = int(ei_traits<ProductType>::RhsFlags)&RowMajorBit ? RowMajor : ColMajor>
|
int RhsOrder = int(ei_traits<ProductType>::RhsFlags)&RowMajorBit ? RowMajor : ColMajor,
|
||||||
|
int RhsHasDirectAccess = int(ei_traits<ProductType>::RhsFlags)&DirectAccessBit? HasDirectAccess : NoDirectAccess>
|
||||||
struct ei_cache_friendly_product_selector
|
struct ei_cache_friendly_product_selector
|
||||||
{
|
{
|
||||||
template<typename DestDerived>
|
template<typename DestDerived>
|
||||||
@ -495,21 +506,57 @@ struct ei_cache_friendly_product_selector
|
|||||||
};
|
};
|
||||||
|
|
||||||
// optimized colmajor * vector path
|
// optimized colmajor * vector path
|
||||||
template<typename ProductType, int LhsRows, int RhsOrder>
|
template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess>
|
||||||
struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,1,RhsOrder>
|
struct ei_cache_friendly_product_selector<ProductType,LhsRows,NoDirectAccess,ColMajor,1,RhsOrder,RhsAccess>
|
||||||
{
|
{
|
||||||
|
typedef typename ei_traits<ProductType>::_LhsNested Lhs;
|
||||||
template<typename DestDerived>
|
template<typename DestDerived>
|
||||||
inline static void run(DestDerived& res, const ProductType& product)
|
inline static void run(DestDerived& res, const ProductType& product)
|
||||||
{
|
{
|
||||||
const int rows = product.rhs().rows();
|
ei_scalar_sum_op<typename ProductType::Scalar> _sum;
|
||||||
for (int j=0; j<rows; ++j)
|
const int size = product.rhs().rows();
|
||||||
res += product.rhs().coeff(j) * product.lhs().col(j);
|
for (int k=0; k<size; ++k)
|
||||||
|
if (Lhs::Flags&DirectAccessBit)
|
||||||
|
// TODO to properly hanlde this workaround let's specialize Block for type having the DirectAccessBit
|
||||||
|
res += product.rhs().coeff(k) * Map<DestDerived>(&product.lhs().const_cast_derived().coeffRef(0,k),product.lhs().rows());
|
||||||
|
else
|
||||||
|
res += product.rhs().coeff(k) * product.lhs().col(k);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// optimized cache friendly colmajor * vector path for matrix with direct access flag
|
||||||
|
// NOTE this path coul also be enabled for expressions if we add runtime align queries
|
||||||
|
template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess>
|
||||||
|
struct ei_cache_friendly_product_selector<ProductType,LhsRows,HasDirectAccess,ColMajor,1,RhsOrder,RhsAccess>
|
||||||
|
{
|
||||||
|
typedef typename ProductType::Scalar Scalar;
|
||||||
|
|
||||||
|
template<typename DestDerived>
|
||||||
|
inline static void run(DestDerived& res, const ProductType& product)
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
EvalToRes = (ei_packet_traits<Scalar>::size==1)
|
||||||
|
||(DestDerived::Flags&ActualPacketAccessBit) && (!(DestDerived::Flags & RowMajorBit)) };
|
||||||
|
Scalar* __restrict__ _res;
|
||||||
|
if (EvalToRes)
|
||||||
|
_res = &res.coeffRef(0);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_res = (Scalar*)alloca(sizeof(Scalar)*res.size());
|
||||||
|
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
||||||
|
}
|
||||||
|
ei_cache_friendly_product(res.size(),
|
||||||
|
&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
|
||||||
|
product.rhs(), _res);
|
||||||
|
|
||||||
|
if (!EvalToRes)
|
||||||
|
res = Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// optimized vector * rowmajor path
|
// optimized vector * rowmajor path
|
||||||
template<typename ProductType, int LhsOrder, int RhsCols>
|
template<typename ProductType, int LhsOrder, int LhsAccess, int RhsCols>
|
||||||
struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,RhsCols,RowMajor>
|
struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCols,RowMajor,NoDirectAccess>
|
||||||
{
|
{
|
||||||
template<typename DestDerived>
|
template<typename DestDerived>
|
||||||
inline static void run(DestDerived& res, const ProductType& product)
|
inline static void run(DestDerived& res, const ProductType& product)
|
||||||
@ -520,6 +567,36 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,RhsCols,RowMajo
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// optimized cache friendly vector * rowmajor path for matrix with direct access flag
|
||||||
|
// NOTE this path coul also be enabled for expressions if we add runtime align queries
|
||||||
|
template<typename ProductType, int LhsOrder, int LhsAccess, int RhsCols>
|
||||||
|
struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCols,RowMajor,HasDirectAccess>
|
||||||
|
{
|
||||||
|
typedef typename ProductType::Scalar Scalar;
|
||||||
|
|
||||||
|
template<typename DestDerived>
|
||||||
|
inline static void run(DestDerived& res, const ProductType& product)
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
EvalToRes = (ei_packet_traits<Scalar>::size==1)
|
||||||
|
||(DestDerived::Flags & ActualPacketAccessBit) && (DestDerived::Flags & RowMajorBit) };
|
||||||
|
Scalar* __restrict__ _res;
|
||||||
|
if (EvalToRes)
|
||||||
|
_res = &res.coeffRef(0);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_res = (Scalar*)alloca(sizeof(Scalar)*res.size());
|
||||||
|
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
||||||
|
}
|
||||||
|
ei_cache_friendly_product(res.size(),
|
||||||
|
&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
|
||||||
|
product.lhs().transpose(), _res);
|
||||||
|
|
||||||
|
if (!EvalToRes)
|
||||||
|
res = Map<Matrix<Scalar,1,DestDerived::ColsAtCompileTime> >(_res, res.size());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal */
|
/** \internal */
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename Lhs,typename Rhs>
|
template<typename Lhs,typename Rhs>
|
||||||
|
@ -39,9 +39,13 @@ template<typename MatrixType> void product(const MatrixType& m)
|
|||||||
|
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint;
|
typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint;
|
||||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> RowVectorType;
|
||||||
|
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> ColVectorType;
|
||||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
|
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
|
||||||
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;
|
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;
|
||||||
|
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime,
|
||||||
|
MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime,
|
||||||
|
MatrixType::Flags&RowMajorBit ? 0 : RowMajorBit> OtherMajorMatrixType;
|
||||||
|
|
||||||
int rows = m.rows();
|
int rows = m.rows();
|
||||||
int cols = m.cols();
|
int cols = m.cols();
|
||||||
@ -59,9 +63,11 @@ template<typename MatrixType> void product(const MatrixType& m)
|
|||||||
ColSquareMatrixType
|
ColSquareMatrixType
|
||||||
square2 = ColSquareMatrixType::random(cols, cols),
|
square2 = ColSquareMatrixType::random(cols, cols),
|
||||||
res2 = ColSquareMatrixType::random(cols, cols);
|
res2 = ColSquareMatrixType::random(cols, cols);
|
||||||
VectorType v1 = VectorType::random(rows),
|
RowVectorType v1 = RowVectorType::random(rows),
|
||||||
v2 = VectorType::random(rows),
|
v2 = RowVectorType::random(rows),
|
||||||
vzero = VectorType::zero(rows);
|
vzero = RowVectorType::zero(rows);
|
||||||
|
ColVectorType vc2 = ColVectorType::random(cols), vcres;
|
||||||
|
OtherMajorMatrixType tm1 = m1;
|
||||||
|
|
||||||
Scalar s1 = ei_random<Scalar>();
|
Scalar s1 = ei_random<Scalar>();
|
||||||
|
|
||||||
@ -89,6 +95,7 @@ template<typename MatrixType> void product(const MatrixType& m)
|
|||||||
|
|
||||||
// test Product.h together with Identity.h
|
// test Product.h together with Identity.h
|
||||||
VERIFY_IS_APPROX(v1, identity*v1);
|
VERIFY_IS_APPROX(v1, identity*v1);
|
||||||
|
VERIFY_IS_APPROX(v1.transpose(), v1.transpose() * identity);
|
||||||
// again, test operator() to check const-qualification
|
// again, test operator() to check const-qualification
|
||||||
VERIFY_IS_APPROX(MatrixType::identity(rows, cols)(r,c), static_cast<Scalar>(r==c));
|
VERIFY_IS_APPROX(MatrixType::identity(rows, cols)(r,c), static_cast<Scalar>(r==c));
|
||||||
|
|
||||||
@ -110,6 +117,21 @@ template<typename MatrixType> void product(const MatrixType& m)
|
|||||||
{
|
{
|
||||||
VERIFY(areNotApprox(res,square + m2 * m1.transpose()));
|
VERIFY(areNotApprox(res,square + m2 * m1.transpose()));
|
||||||
}
|
}
|
||||||
|
vcres = vc2;
|
||||||
|
vcres += (m1.transpose() * v1).lazy();
|
||||||
|
VERIFY_IS_APPROX(vcres, vc2 + m1.transpose() * v1);
|
||||||
|
tm1 = m1;
|
||||||
|
VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1);
|
||||||
|
VERIFY_IS_APPROX(v1.transpose() * tm1, v1.transpose() * m1);
|
||||||
|
|
||||||
|
// test submatrix and matrix/vector product
|
||||||
|
for (int i=0; i<rows; ++i)
|
||||||
|
res.row(i) = m1.row(i) * m2.transpose();
|
||||||
|
VERIFY_IS_APPROX(res, m1 * m2.transpose());
|
||||||
|
// the other way round:
|
||||||
|
for (int i=0; i<rows; ++i)
|
||||||
|
res.col(i) = m1 * m2.transpose().col(i);
|
||||||
|
VERIFY_IS_APPROX(res, m1 * m2.transpose());
|
||||||
|
|
||||||
res2 = square2;
|
res2 = square2;
|
||||||
res2 += (m1.transpose() * m2).lazy();
|
res2 += (m1.transpose() * m2).lazy();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user