mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
* Optimization: added a specialization of Block for xpr with DirectAccessBit
* some simplifications and fixes in cache friendly products
This commit is contained in:
parent
1bbaea9885
commit
861d18d553
@ -56,8 +56,8 @@
|
|||||||
*
|
*
|
||||||
* \sa MatrixBase::block(int,int,int,int), MatrixBase::block(int,int), class VectorBlock
|
* \sa MatrixBase::block(int,int,int,int), MatrixBase::block(int,int), class VectorBlock
|
||||||
*/
|
*/
|
||||||
template<typename MatrixType, int BlockRows, int BlockCols>
|
template<typename MatrixType, int BlockRows, int BlockCols, int DirectAccesStatus>
|
||||||
struct ei_traits<Block<MatrixType, BlockRows, BlockCols> >
|
struct ei_traits<Block<MatrixType, BlockRows, BlockCols, DirectAccesStatus> >
|
||||||
{
|
{
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
enum{
|
enum{
|
||||||
@ -83,8 +83,8 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols> >
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename MatrixType, int BlockRows, int BlockCols> class Block
|
template<typename MatrixType, int BlockRows, int BlockCols, int DirectAccesStatus> class Block
|
||||||
: public MatrixBase<Block<MatrixType, BlockRows, BlockCols> >
|
: public MatrixBase<Block<MatrixType, BlockRows, BlockCols, DirectAccesStatus> >
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -203,6 +203,137 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block
|
|||||||
const ei_int_if_dynamic<ColsAtCompileTime> m_blockCols;
|
const ei_int_if_dynamic<ColsAtCompileTime> m_blockCols;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** \internal */
|
||||||
|
template<typename MatrixType, int BlockRows, int BlockCols> class Block<MatrixType,BlockRows,BlockCols,HasDirectAccess>
|
||||||
|
: public MatrixBase<Block<MatrixType, BlockRows, BlockCols,HasDirectAccess> >
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
IsRowMajor = int(ei_traits<MatrixType>::Flags)&RowMajorBit ? 1 : 0
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
|
||||||
|
|
||||||
|
/** Column or Row constructor
|
||||||
|
*/
|
||||||
|
inline Block(const MatrixType& matrix, int i)
|
||||||
|
: m_matrix(matrix),
|
||||||
|
m_data_ptr(&matrix.const_cast_derived().coeffRef(
|
||||||
|
(BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) ? i : 0,
|
||||||
|
(BlockRows==MatrixType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
|
||||||
|
m_blockRows(matrix.rows()),
|
||||||
|
m_blockCols(matrix.cols())
|
||||||
|
{
|
||||||
|
ei_assert( (i>=0) && (
|
||||||
|
((BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) && i<matrix.rows())
|
||||||
|
||((BlockRows==MatrixType::RowsAtCompileTime) && (BlockCols==1) && i<matrix.cols())));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixed-size constructor
|
||||||
|
*/
|
||||||
|
inline Block(const MatrixType& matrix, int startRow, int startCol)
|
||||||
|
: m_matrix(matrix), m_data_ptr(&matrix.const_cast_derived().coeffRef(startRow,startCol))
|
||||||
|
{
|
||||||
|
ei_assert(RowsAtCompileTime!=Dynamic && RowsAtCompileTime!=Dynamic);
|
||||||
|
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= matrix.rows()
|
||||||
|
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= matrix.cols());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Dynamic-size constructor
|
||||||
|
*/
|
||||||
|
inline Block(const MatrixType& matrix,
|
||||||
|
int startRow, int startCol,
|
||||||
|
int blockRows, int blockCols)
|
||||||
|
: m_matrix(matrix), m_data_ptr(&matrix.const_cast_derived().coeffRef(startRow,startCol)),
|
||||||
|
m_blockRows(blockRows), m_blockCols(blockCols)
|
||||||
|
{
|
||||||
|
ei_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
|
||||||
|
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
|
||||||
|
ei_assert(startRow >= 0 && blockRows >= 1 && startRow + blockRows <= matrix.rows()
|
||||||
|
&& startCol >= 0 && blockCols >= 1 && startCol + blockCols <= matrix.cols());
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
|
||||||
|
|
||||||
|
inline int rows() const { return m_blockRows.value(); }
|
||||||
|
inline int cols() const { return m_blockCols.value(); }
|
||||||
|
|
||||||
|
inline int stride(void) const { return m_matrix.stride(); }
|
||||||
|
|
||||||
|
inline Scalar& coeffRef(int row, int col)
|
||||||
|
{
|
||||||
|
if (IsRowMajor)
|
||||||
|
return m_data_ptr[col + row * stride()];
|
||||||
|
else
|
||||||
|
return m_data_ptr[row + col * stride()];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const Scalar coeff(int row, int col) const
|
||||||
|
{
|
||||||
|
// std::cerr << "coeff(int row, int col)\n";
|
||||||
|
if (IsRowMajor)
|
||||||
|
return m_data_ptr[col + row * stride()];
|
||||||
|
else
|
||||||
|
return m_data_ptr[row + col * stride()];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Scalar& coeffRef(int index)
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
||||||
|
return m_data_ptr[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const Scalar coeff(int index) const
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
||||||
|
if ( (RowsAtCompileTime == 1) == IsRowMajor )
|
||||||
|
return m_data_ptr[index];
|
||||||
|
else
|
||||||
|
return m_data_ptr[index*stride()];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
inline PacketScalar packet(int row, int col) const
|
||||||
|
{
|
||||||
|
if (IsRowMajor)
|
||||||
|
return ei_ploadu(&m_data_ptr[col + row * stride()]);
|
||||||
|
else
|
||||||
|
return ei_ploadu(&m_data_ptr[row + col * stride()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
inline void writePacket(int row, int col, const PacketScalar& x)
|
||||||
|
{
|
||||||
|
if (IsRowMajor)
|
||||||
|
ei_pstoreu(&m_data_ptr[col + row * stride()], x);
|
||||||
|
else
|
||||||
|
ei_pstoreu(&m_data_ptr[row + col * stride()], x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
inline PacketScalar packet(int index) const
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
||||||
|
return ei_ploadu(&m_data_ptr[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
inline void writePacket(int index, const PacketScalar& x)
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
||||||
|
ei_pstoreu(&m_data_ptr[index], x);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
const typename MatrixType::Nested m_matrix;
|
||||||
|
Scalar* m_data_ptr;
|
||||||
|
const ei_int_if_dynamic<RowsAtCompileTime> m_blockRows;
|
||||||
|
const ei_int_if_dynamic<ColsAtCompileTime> m_blockCols;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/** \returns a dynamic-size expression of a block in *this.
|
/** \returns a dynamic-size expression of a block in *this.
|
||||||
*
|
*
|
||||||
* \param startRow the first row in the block
|
* \param startRow the first row in the block
|
||||||
|
@ -367,7 +367,7 @@ static void ei_cache_friendly_product(
|
|||||||
* TODO: since rhs gets evaluated only once, no need to evaluate it
|
* TODO: since rhs gets evaluated only once, no need to evaluate it
|
||||||
*/
|
*/
|
||||||
template<typename Scalar, typename RhsType>
|
template<typename Scalar, typename RhsType>
|
||||||
EIGEN_DONT_INLINE static void ei_cache_friendly_product(
|
EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
|
||||||
int size,
|
int size,
|
||||||
const Scalar* lhs, int lhsStride,
|
const Scalar* lhs, int lhsStride,
|
||||||
const RhsType& rhs,
|
const RhsType& rhs,
|
||||||
@ -408,54 +408,34 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product(
|
|||||||
: alignmentStep==2 ? EvenAligned
|
: alignmentStep==2 ? EvenAligned
|
||||||
: FirstAligned;
|
: FirstAligned;
|
||||||
|
|
||||||
// find how many column do we have to skip to be aligned with the result (if possible)
|
// find how many columns do we have to skip to be aligned with the result (if possible)
|
||||||
int skipColumns=0;
|
int skipColumns=0;
|
||||||
for (; skipColumns<PacketSize; ++skipColumns)
|
for (; skipColumns<PacketSize && alignedStart != alignmentStep*skipColumns; ++skipColumns)
|
||||||
{
|
{}
|
||||||
if (alignedStart == alignmentStep*skipColumns)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (skipColumns==PacketSize)
|
if (skipColumns==PacketSize)
|
||||||
|
{
|
||||||
|
// nothing can be aligned, no need to skip any column
|
||||||
alignmentPattern = NoneAligned;
|
alignmentPattern = NoneAligned;
|
||||||
skipColumns = std::min(skipColumns,rhs.size());
|
skipColumns = 0;
|
||||||
if (alignmentPattern!=NoneAligned)
|
}
|
||||||
for (int i=0; i<skipColumns; i++)
|
else
|
||||||
{
|
{
|
||||||
Scalar tmp0 = rhs[i];
|
skipColumns = std::min(skipColumns,rhs.size());
|
||||||
Packet ptmp0 = ei_pset1(tmp0);
|
// note that the skiped columns are processed later.
|
||||||
int iN0 = i*lhsStride;
|
}
|
||||||
// process first unaligned result's coeffs
|
|
||||||
for (int j=0; j<alignedStart; j++)
|
|
||||||
res[j] += tmp0 * lhs[j+iN0];
|
|
||||||
// process aligned result's coeffs (we know the lhs columns are not aligned)
|
|
||||||
for (int j = alignedStart;j<alignedSize;j+=PacketSize)
|
|
||||||
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&lhs[j+iN0])),ei_pload(&res[j])));
|
|
||||||
// process remaining result's coeffs
|
|
||||||
for (int j=alignedSize; j<size; j++)
|
|
||||||
res[j] += tmp0 * lhs[j+iN0];
|
|
||||||
}
|
|
||||||
|
|
||||||
int columnBound = (rhs.size()/columnsAtOnce)*columnsAtOnce;
|
int columnBound = (rhs.size()/columnsAtOnce)*columnsAtOnce;
|
||||||
for (int i=0; i<columnBound; i+=columnsAtOnce)
|
for (int i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
||||||
{
|
{
|
||||||
Scalar tmp0 = rhs[i];
|
Scalar tmp0 = rhs[i], tmp1 = rhs[i+1], tmp2 = rhs[i+2], tmp3 = rhs[i+3];
|
||||||
Packet ptmp0 = ei_pset1(tmp0);
|
Packet ptmp0 = ei_pset1(tmp0), ptmp1 = ei_pset1(tmp1), ptmp2 = ei_pset1(tmp2), ptmp3 = ei_pset1(tmp3);
|
||||||
Scalar tmp1 = rhs[i+1];
|
int iN0 = i*lhsStride, iN1 = (i+1)*lhsStride, iN2 = (i+2)*lhsStride, iN3 = (i+3)*lhsStride;
|
||||||
Packet ptmp1 = ei_pset1(tmp1);
|
|
||||||
Scalar tmp2 = rhs[i+2];
|
|
||||||
Packet ptmp2 = ei_pset1(tmp2);
|
|
||||||
Scalar tmp3 = rhs[i+3];
|
|
||||||
Packet ptmp3 = ei_pset1(tmp3);
|
|
||||||
int iN0 = i*lhsStride;
|
|
||||||
int iN1 = (i+1)*lhsStride;
|
|
||||||
int iN2 = (i+2)*lhsStride;
|
|
||||||
int iN3 = (i+3)*lhsStride;
|
|
||||||
|
|
||||||
// process initial unaligned coeffs
|
// process initial unaligned coeffs
|
||||||
for (int j=0; j<alignedStart; j++)
|
for (int j=0; j<alignedStart; j++)
|
||||||
res[j] += tmp0 * lhs[j+iN0] + tmp1 * lhs[j+iN1] + tmp2 * lhs[j+iN2] + tmp3 * lhs[j+iN3];
|
res[j] += tmp0 * lhs[j+iN0] + tmp1 * lhs[j+iN1] + tmp2 * lhs[j+iN2] + tmp3 * lhs[j+iN3];
|
||||||
|
|
||||||
if (alignedSize>0)
|
if (alignedSize>alignedStart)
|
||||||
{
|
{
|
||||||
switch(alignmentPattern)
|
switch(alignmentPattern)
|
||||||
{
|
{
|
||||||
@ -475,10 +455,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product(
|
|||||||
_EIGEN_ACCUMULATE_PACKETS(,u,u,+PacketSize);
|
_EIGEN_ACCUMULATE_PACKETS(,u,u,+PacketSize);
|
||||||
if (peels>2) _EIGEN_ACCUMULATE_PACKETS(,u,u,+2*PacketSize);
|
if (peels>2) _EIGEN_ACCUMULATE_PACKETS(,u,u,+2*PacketSize);
|
||||||
if (peels>3) _EIGEN_ACCUMULATE_PACKETS(,u,u,+3*PacketSize);
|
if (peels>3) _EIGEN_ACCUMULATE_PACKETS(,u,u,+3*PacketSize);
|
||||||
if (peels>4) _EIGEN_ACCUMULATE_PACKETS(,u,u,+4*PacketSize);
|
|
||||||
if (peels>5) _EIGEN_ACCUMULATE_PACKETS(,u,u,+5*PacketSize);
|
|
||||||
if (peels>6) _EIGEN_ACCUMULATE_PACKETS(,u,u,+6*PacketSize);
|
|
||||||
if (peels>7) _EIGEN_ACCUMULATE_PACKETS(,u,u,+7*PacketSize);
|
|
||||||
}
|
}
|
||||||
for (int j = peeledSize; j<alignedSize; j+=PacketSize)
|
for (int j = peeledSize; j<alignedSize; j+=PacketSize)
|
||||||
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
||||||
@ -494,25 +470,42 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product(
|
|||||||
for (int j=alignedSize; j<size; j++)
|
for (int j=alignedSize; j<size; j++)
|
||||||
res[j] += tmp0 * lhs[j+iN0] + tmp1 * lhs[j+iN1] + tmp2 * lhs[j+iN2] + tmp3 * lhs[j+iN3];
|
res[j] += tmp0 * lhs[j+iN0] + tmp1 * lhs[j+iN1] + tmp2 * lhs[j+iN2] + tmp3 * lhs[j+iN3];
|
||||||
}
|
}
|
||||||
for (int i=columnBound; i<rhs.size(); i++)
|
|
||||||
|
// process remaining first and last columns (at most columnsAtOnce-1)
|
||||||
|
int end = rhs.size();
|
||||||
|
int start = columnBound;
|
||||||
|
do
|
||||||
{
|
{
|
||||||
Scalar tmp0 = rhs[i];
|
for (int i=columnBound; i<end; i++)
|
||||||
Packet ptmp0 = ei_pset1(tmp0);
|
|
||||||
int iN0 = i*lhsStride;
|
|
||||||
if (alignedSize>0)
|
|
||||||
{
|
{
|
||||||
bool aligned0 = (iN0 % PacketSize) == 0;
|
Scalar tmp0 = rhs[i];
|
||||||
if (aligned0)
|
Packet ptmp0 = ei_pset1(tmp0);
|
||||||
|
int iN0 = i*lhsStride;
|
||||||
|
// process first unaligned result's coeffs
|
||||||
|
for (int j=0; j<alignedStart; j++)
|
||||||
|
res[j] += tmp0 * lhs[j+iN0];
|
||||||
|
|
||||||
|
// process aligned result's coeffs
|
||||||
|
if ((iN0 % PacketSize) == 0)
|
||||||
for (int j = 0;j<alignedSize;j+=PacketSize)
|
for (int j = 0;j<alignedSize;j+=PacketSize)
|
||||||
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_pload(&lhs[j+iN0])),ei_pload(&res[j])));
|
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_pload(&lhs[j+iN0])),ei_pload(&res[j])));
|
||||||
else
|
else
|
||||||
for (int j = 0;j<alignedSize;j+=PacketSize)
|
for (int j = 0;j<alignedSize;j+=PacketSize)
|
||||||
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&lhs[j+iN0])),ei_pload(&res[j])));
|
ei_pstore(&res[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&lhs[j+iN0])),ei_pload(&res[j])));
|
||||||
|
|
||||||
|
// process remaining scalars
|
||||||
|
for (int j=alignedSize; j<size; j++)
|
||||||
|
res[j] += tmp0 * lhs[j+iN0];
|
||||||
}
|
}
|
||||||
// process remaining scalars
|
if (skipColumns)
|
||||||
for (int j=alignedSize; j<size; j++)
|
{
|
||||||
res[j] += tmp0 * lhs[j+iN0];
|
start = 0;
|
||||||
}
|
end = skipColumns;
|
||||||
|
skipColumns = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
} while(true);
|
||||||
asm("#end matrix_vector_product");
|
asm("#end matrix_vector_product");
|
||||||
|
|
||||||
#undef _EIGEN_ACCUMULATE_PACKETS
|
#undef _EIGEN_ACCUMULATE_PACKETS
|
||||||
|
@ -365,8 +365,7 @@ struct ei_product_coeff_impl<InnerVectorization, Index, Lhs, Rhs>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME the following is a hack to get very high perf with matrix-vector product,
|
// NOTE the following specializations are because taking .col(0) on a vector is a bit slower
|
||||||
// however, it would be preferable to switch for more general dynamic alignment queries
|
|
||||||
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
|
template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
|
||||||
struct ei_product_coeff_vectorized_dyn_selector
|
struct ei_product_coeff_vectorized_dyn_selector
|
||||||
{
|
{
|
||||||
@ -481,14 +480,9 @@ struct ei_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, PacketScalar, LoadMod
|
|||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
template<typename Scalar, typename RhsType>
|
template<typename Scalar, typename RhsType>
|
||||||
static void ei_cache_friendly_product(
|
static void ei_cache_friendly_product_colmajor_times_vector(
|
||||||
int size, const Scalar* lhs, int lhsStride, const RhsType& rhs, Scalar* res);
|
int size, const Scalar* lhs, int lhsStride, const RhsType& rhs, Scalar* res);
|
||||||
|
|
||||||
enum {
|
|
||||||
HasDirectAccess,
|
|
||||||
NoDirectAccess
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename ProductType,
|
template<typename ProductType,
|
||||||
int LhsRows = ei_traits<ProductType>::RowsAtCompileTime,
|
int LhsRows = ei_traits<ProductType>::RowsAtCompileTime,
|
||||||
int LhsOrder = int(ei_traits<ProductType>::LhsFlags)&RowMajorBit ? RowMajor : ColMajor,
|
int LhsOrder = int(ei_traits<ProductType>::LhsFlags)&RowMajorBit ? RowMajor : ColMajor,
|
||||||
@ -507,19 +501,13 @@ struct ei_cache_friendly_product_selector
|
|||||||
|
|
||||||
// optimized colmajor * vector path
|
// optimized colmajor * vector path
|
||||||
template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess>
|
template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess>
|
||||||
struct ei_cache_friendly_product_selector<ProductType,LhsRows,NoDirectAccess,ColMajor,1,RhsOrder,RhsAccess>
|
struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,NoDirectAccess,1,RhsOrder,RhsAccess>
|
||||||
{
|
{
|
||||||
typedef typename ei_traits<ProductType>::_LhsNested Lhs;
|
|
||||||
template<typename DestDerived>
|
template<typename DestDerived>
|
||||||
inline static void run(DestDerived& res, const ProductType& product)
|
inline static void run(DestDerived& res, const ProductType& product)
|
||||||
{
|
{
|
||||||
ei_scalar_sum_op<typename ProductType::Scalar> _sum;
|
|
||||||
const int size = product.rhs().rows();
|
const int size = product.rhs().rows();
|
||||||
for (int k=0; k<size; ++k)
|
for (int k=0; k<size; ++k)
|
||||||
if (Lhs::Flags&DirectAccessBit)
|
|
||||||
// TODO to properly hanlde this workaround let's specialize Block for type having the DirectAccessBit
|
|
||||||
res += product.rhs().coeff(k) * Map<DestDerived>(&product.lhs().const_cast_derived().coeffRef(0,k),product.lhs().rows());
|
|
||||||
else
|
|
||||||
res += product.rhs().coeff(k) * product.lhs().col(k);
|
res += product.rhs().coeff(k) * product.lhs().col(k);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -527,7 +515,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,NoDirectAccess,Col
|
|||||||
// optimized cache friendly colmajor * vector path for matrix with direct access flag
|
// optimized cache friendly colmajor * vector path for matrix with direct access flag
|
||||||
// NOTE this path coul also be enabled for expressions if we add runtime align queries
|
// NOTE this path coul also be enabled for expressions if we add runtime align queries
|
||||||
template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess>
|
template<typename ProductType, int LhsRows, int RhsOrder, int RhsAccess>
|
||||||
struct ei_cache_friendly_product_selector<ProductType,LhsRows,HasDirectAccess,ColMajor,1,RhsOrder,RhsAccess>
|
struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirectAccess,1,RhsOrder,RhsAccess>
|
||||||
{
|
{
|
||||||
typedef typename ProductType::Scalar Scalar;
|
typedef typename ProductType::Scalar Scalar;
|
||||||
|
|
||||||
@ -545,7 +533,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,HasDirectAccess,Co
|
|||||||
_res = (Scalar*)alloca(sizeof(Scalar)*res.size());
|
_res = (Scalar*)alloca(sizeof(Scalar)*res.size());
|
||||||
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
||||||
}
|
}
|
||||||
ei_cache_friendly_product(res.size(),
|
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
||||||
&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
|
&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
|
||||||
product.rhs(), _res);
|
product.rhs(), _res);
|
||||||
|
|
||||||
@ -588,7 +576,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
|
|||||||
_res = (Scalar*)alloca(sizeof(Scalar)*res.size());
|
_res = (Scalar*)alloca(sizeof(Scalar)*res.size());
|
||||||
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
||||||
}
|
}
|
||||||
ei_cache_friendly_product(res.size(),
|
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
||||||
&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
|
&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
|
||||||
product.lhs().transpose(), _res);
|
product.lhs().transpose(), _res);
|
||||||
|
|
||||||
|
@ -206,6 +206,11 @@ enum {
|
|||||||
RowMajor = RowMajorBit
|
RowMajor = RowMajorBit
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NoDirectAccess = 0,
|
||||||
|
HasDirectAccess = DirectAccessBit
|
||||||
|
};
|
||||||
|
|
||||||
const int FullyCoherentAccessPattern = 0x1;
|
const int FullyCoherentAccessPattern = 0x1;
|
||||||
const int InnerCoherentAccessPattern = 0x2 | FullyCoherentAccessPattern;
|
const int InnerCoherentAccessPattern = 0x2 | FullyCoherentAccessPattern;
|
||||||
const int OuterCoherentAccessPattern = 0x4 | InnerCoherentAccessPattern;
|
const int OuterCoherentAccessPattern = 0x4 | InnerCoherentAccessPattern;
|
||||||
|
@ -42,7 +42,8 @@ class Matrix;
|
|||||||
template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
|
template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
|
||||||
template<typename ExpressionType> class NestByValue;
|
template<typename ExpressionType> class NestByValue;
|
||||||
template<typename MatrixType> class Minor;
|
template<typename MatrixType> class Minor;
|
||||||
template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic> class Block;
|
template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic,
|
||||||
|
int DirectAccessStatus = ei_traits<MatrixType>::Flags&DirectAccessBit> class Block;
|
||||||
template<typename MatrixType> class Transpose;
|
template<typename MatrixType> class Transpose;
|
||||||
template<typename MatrixType> class Conjugate;
|
template<typename MatrixType> class Conjugate;
|
||||||
template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
|
template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
|
||||||
|
@ -82,12 +82,12 @@ int main(int argc, char *argv[])
|
|||||||
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
|
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
|
||||||
std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
|
std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
|
||||||
std::cout << "Usage: " << argv[0] << " check\n";
|
std::cout << "Usage: " << argv[0] << " check\n";
|
||||||
std::cout << "Options:\n"
|
std::cout << "Options:\n";
|
||||||
std::cout << " size unique size of the 2 matrices (integer)\n";
|
std::cout << " size unique size of the 2 matrices (integer)\n";
|
||||||
std::cout << " auto automatically set the number of repetitions and tries\n";
|
std::cout << " auto automatically set the number of repetitions and tries\n";
|
||||||
std::cout << " nbloops number of times the GEMM routines is executed\n"
|
std::cout << " nbloops number of times the GEMM routines is executed\n";
|
||||||
std::cout << " nbtries number of times the loop is benched (return the best try)\n"
|
std::cout << " nbtries number of times the loop is benched (return the best try)\n";
|
||||||
std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"
|
std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
|
||||||
std::cout << " check check eigen product using cblas as a reference\n";
|
std::cout << " check check eigen product using cblas as a reference\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user