Make the explicit vectorization much more flexible:

- support dynamic sizes
 - support arbitrary matrix size when the matrix can be seen as a 1D array
   (except for fixed size matrices where the size in Bytes must be a factor of 16,
    this is to allow compact storage of a vector of matrices)
Note that the explict vectorization is still experimental and far to be completely tested.
This commit is contained in:
Gael Guennebaud 2008-04-25 15:46:18 +00:00
parent 30d47b5250
commit a451835bce
10 changed files with 264 additions and 100 deletions

View File

@ -2,7 +2,7 @@
#define EIGEN_CORE_H #define EIGEN_CORE_H
#ifndef EIGEN_DONT_VECTORIZE #ifndef EIGEN_DONT_VECTORIZE
#ifdef __SSE2__ #if ((defined __SSE2__) && ( (!defined __GNUC__) || (__GNUC__>=4 && __GNUC_MINOR__>=2)))
#define EIGEN_VECTORIZE #define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE #define EIGEN_VECTORIZE_SSE
#include <emmintrin.h> #include <emmintrin.h>

View File

@ -99,7 +99,11 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
template <typename Derived, typename OtherDerived, template <typename Derived, typename OtherDerived,
bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit) bool Vectorize = (Derived::Flags & OtherDerived::Flags & VectorizableBit)
&& ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))> && ((Derived::Flags&RowMajorBit)==(OtherDerived::Flags&RowMajorBit))
&& ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
||((Derived::Flags&RowMajorBit)
? Derived::ColsAtCompileTime!=Dynamic && (Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)
: Derived::RowsAtCompileTime!=Dynamic && (Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size==0)) )>
struct ei_assignment_impl; struct ei_assignment_impl;
template<typename Derived> template<typename Derived>
@ -107,6 +111,7 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived> Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other) ::lazyAssign(const MatrixBase<OtherDerived>& other)
{ {
// std::cout << "lazyAssign = " << Derived::Flags << " " << OtherDerived::Flags << "\n";
ei_assignment_impl<Derived,OtherDerived>::execute(derived(),other.derived()); ei_assignment_impl<Derived,OtherDerived>::execute(derived(),other.derived());
return derived(); return derived();
} }
@ -178,6 +183,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
if(unroll) if(unroll)
{ {
// std::cout << "vectorized unrolled\n";
ei_matrix_assignment_packet_unroller ei_matrix_assignment_packet_unroller
<Derived, OtherDerived, <Derived, OtherDerived,
unroll && int(Derived::SizeAtCompileTime)>=ei_packet_traits<typename Derived::Scalar>::size unroll && int(Derived::SizeAtCompileTime)>=ei_packet_traits<typename Derived::Scalar>::size
@ -188,15 +194,61 @@ struct ei_assignment_impl<Derived, OtherDerived, true>
{ {
if(OtherDerived::Flags&RowMajorBit) if(OtherDerived::Flags&RowMajorBit)
{ {
for(int i = 0; i < dst.rows(); i++) if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size) && (Derived::ColsAtCompileTime==Dynamic
|| Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
{
// std::cout << "vectorized linear row major\n";
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index/dst.rows();
int j = index%dst.rows();
dst.writePacketCoeff(i, j, src.packetCoeff(i, j)); dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
for(int i = alignedSize/dst.rows(); i < dst.rows(); i++)
for(int j = alignedSize%dst.rows(); j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// std::cout << "vectorized normal row major\n";
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
} }
else else
{ {
for(int j = 0; j < dst.cols(); j++) if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size) && ( Derived::RowsAtCompileTime==Dynamic
|| Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
{
// std::cout << "vectorized linear col major\n";
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index%dst.rows();
int j = index/dst.rows();
dst.writePacketCoeff(i, j, src.packetCoeff(i, j)); dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// std::cout << "vectorized normal col major\n";
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
dst.writePacketCoeff(i, j, src.packetCoeff(i, j));
}
} }
} }
} }

View File

@ -31,8 +31,8 @@
* *
* \param NullaryOp template functor implementing the operator * \param NullaryOp template functor implementing the operator
* *
* This class represents an expression of a generic zeroary operator. * This class represents an expression of a generic nullary operator.
* It is the return type of the ones(), zero(), constant() and random() functions, * It is the return type of the ones(), zero(), constant(), identity() and random() functions,
* and most of the time this is the only way it is used. * and most of the time this is the only way it is used.
* *
* However, if you want to write a function returning such an expression, you * However, if you want to write a function returning such an expression, you
@ -94,12 +94,18 @@ class CwiseNullaryOp : ei_no_assignment_operator,
}; };
/* \returns an expression of a custom coefficient-wise operator \a func of *this and \a other /** \returns an expression of a matrix defined by a custom functor \a func
* *
* The template parameter \a CustomNullaryOp is the type of the functor * The parameters \a rows and \a cols are the number of rows and of columns of
* of the custom operator (see class CwiseNullaryOp for an example) * the returned matrix. Must be compatible with this MatrixBase type.
* *
* \sa class CwiseNullaryOp, MatrixBase::operator+, MatrixBase::operator-, MatrixBase::cwiseProduct, MatrixBase::cwiseQuotient * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
@ -109,6 +115,21 @@ MatrixBase<Derived>::cwiseCreate(int rows, int cols, const CustomNullaryOp& func
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func); return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
} }
/** \returns an expression of a matrix defined by a custom functor \a func
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived> template<typename Derived>
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
const CwiseNullaryOp<CustomNullaryOp, Derived> const CwiseNullaryOp<CustomNullaryOp, Derived>
@ -119,6 +140,15 @@ MatrixBase<Derived>::cwiseCreate(int size, const CustomNullaryOp& func)
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func); else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
} }
/** \returns an expression of a matrix defined by a custom functor \a func
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived> template<typename Derived>
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
const CwiseNullaryOp<CustomNullaryOp, Derived> const CwiseNullaryOp<CustomNullaryOp, Derived>
@ -127,7 +157,16 @@ MatrixBase<Derived>::cwiseCreate(const CustomNullaryOp& func)
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows(), cols(), func); return CwiseNullaryOp<CustomNullaryOp, Derived>(rows(), cols(), func);
} }
/* \returns an expression of the coefficient-wise \< operator of *this and \a other /** \returns an expression of a constant matrix of value \a value
*
* The parameters \a rows and \a cols are the number of rows and of columns of
* the returned matrix. Must be compatible with this MatrixBase type.
*
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
* it is redundant to pass \a rows and \a cols as arguments, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
* *
* \sa class CwiseNullaryOp * \sa class CwiseNullaryOp
*/ */
@ -138,6 +177,21 @@ MatrixBase<Derived>::constant(int rows, int cols, const Scalar& value)
return cwiseCreate(rows, cols, ei_scalar_constant_op<Scalar>(value)); return cwiseCreate(rows, cols, ei_scalar_constant_op<Scalar>(value));
} }
/** \returns an expression of a constant matrix of value \a value
*
* The parameter \a size is the size of the returned vector.
* Must be compatible with this MatrixBase type.
*
* \only_for_vectors
*
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
* it is redundant to pass \a size as argument, so zero() should be used
* instead.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived> template<typename Derived>
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived> const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::constant(int size, const Scalar& value) MatrixBase<Derived>::constant(int size, const Scalar& value)
@ -145,6 +199,15 @@ MatrixBase<Derived>::constant(int size, const Scalar& value)
return cwiseCreate(size, ei_scalar_constant_op<Scalar>(value)); return cwiseCreate(size, ei_scalar_constant_op<Scalar>(value));
} }
/** \returns an expression of a constant matrix of value \a value
*
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
* need to use the variants taking size arguments.
*
* The template parameter \a CustomNullaryOp is the type of the functor.
*
* \sa class CwiseNullaryOp
*/
template<typename Derived> template<typename Derived>
const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived> const CwiseNullaryOp<ei_scalar_constant_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::constant(const Scalar& value) MatrixBase<Derived>::constant(const Scalar& value)
@ -163,6 +226,10 @@ bool MatrixBase<Derived>::isEqualToConstant
return true; return true;
} }
/** Sets all coefficients in this expression to \a value.
*
* \sa class CwiseNullaryOp, zero(), ones()
*/
template<typename Derived> template<typename Derived>
Derived& MatrixBase<Derived>::setConstant(const Scalar& value) Derived& MatrixBase<Derived>::setConstant(const Scalar& value)
{ {
@ -238,7 +305,7 @@ MatrixBase<Derived>::zero()
* Example: \include MatrixBase_isZero.cpp * Example: \include MatrixBase_isZero.cpp
* Output: \verbinclude MatrixBase_isZero.out * Output: \verbinclude MatrixBase_isZero.out
* *
* \sa class Zero, zero() * \sa class CwiseNullaryOp, zero()
*/ */
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isZero bool MatrixBase<Derived>::isZero
@ -256,7 +323,7 @@ bool MatrixBase<Derived>::isZero
* Example: \include MatrixBase_setZero.cpp * Example: \include MatrixBase_setZero.cpp
* Output: \verbinclude MatrixBase_setZero.out * Output: \verbinclude MatrixBase_setZero.out
* *
* \sa class Zero, zero() * \sa class CwiseNullaryOp, zero()
*/ */
template<typename Derived> template<typename Derived>
Derived& MatrixBase<Derived>::setZero() Derived& MatrixBase<Derived>::setZero()
@ -333,7 +400,7 @@ MatrixBase<Derived>::ones()
* Example: \include MatrixBase_isOnes.cpp * Example: \include MatrixBase_isOnes.cpp
* Output: \verbinclude MatrixBase_isOnes.out * Output: \verbinclude MatrixBase_isOnes.out
* *
* \sa class Ones, ones() * \sa class CwiseNullaryOp, ones()
*/ */
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isOnes bool MatrixBase<Derived>::isOnes
@ -347,7 +414,7 @@ bool MatrixBase<Derived>::isOnes
* Example: \include MatrixBase_setOnes.cpp * Example: \include MatrixBase_setOnes.cpp
* Output: \verbinclude MatrixBase_setOnes.out * Output: \verbinclude MatrixBase_setOnes.out
* *
* \sa class Ones, ones() * \sa class CwiseNullaryOp, ones()
*/ */
template<typename Derived> template<typename Derived>
Derived& MatrixBase<Derived>::setOnes() Derived& MatrixBase<Derived>::setOnes()
@ -424,7 +491,7 @@ MatrixBase<Derived>::random()
* Example: \include MatrixBase_setRandom.cpp * Example: \include MatrixBase_setRandom.cpp
* Output: \verbinclude MatrixBase_setRandom.out * Output: \verbinclude MatrixBase_setRandom.out
* *
* \sa class Random, ei_random() * \sa class CwiseNullaryOp, ei_random()
*/ */
template<typename Derived> template<typename Derived>
Derived& MatrixBase<Derived>::setRandom() Derived& MatrixBase<Derived>::setRandom()
@ -479,7 +546,7 @@ MatrixBase<Derived>::identity()
* Example: \include MatrixBase_isIdentity.cpp * Example: \include MatrixBase_isIdentity.cpp
* Output: \verbinclude MatrixBase_isIdentity.out * Output: \verbinclude MatrixBase_isIdentity.out
* *
* \sa class Identity, identity(), identity(int,int), setIdentity() * \sa class CwiseNullaryOp, identity(), identity(int,int), setIdentity()
*/ */
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isIdentity bool MatrixBase<Derived>::isIdentity
@ -509,7 +576,7 @@ bool MatrixBase<Derived>::isIdentity
* Example: \include MatrixBase_setIdentity.cpp * Example: \include MatrixBase_setIdentity.cpp
* Output: \verbinclude MatrixBase_setIdentity.out * Output: \verbinclude MatrixBase_setIdentity.out
* *
* \sa class Identity, identity(), identity(int,int), isIdentity() * \sa class CwiseNullaryOp, identity(), identity(int,int), isIdentity()
*/ */
template<typename Derived> template<typename Derived>
Derived& MatrixBase<Derived>::setIdentity() Derived& MatrixBase<Derived>::setIdentity()

View File

@ -72,6 +72,11 @@ template<typename ExpressionType> class Lazy
return m_expression.coeff(row, col); return m_expression.coeff(row, col);
} }
PacketScalar _packetCoeff(int row, int col) const
{
return m_expression.packetCoeff(row, col);
}
protected: protected:
const typename ExpressionType::Nested m_expression; const typename ExpressionType::Nested m_expression;
}; };

View File

@ -79,7 +79,7 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _SuggestedFlags, _MaxRows, _MaxCo
ColsAtCompileTime = _Cols, ColsAtCompileTime = _Cols,
MaxRowsAtCompileTime = _MaxRows, MaxRowsAtCompileTime = _MaxRows,
MaxColsAtCompileTime = _MaxCols, MaxColsAtCompileTime = _MaxCols,
Flags = ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _SuggestedFlags>::ret, Flags = ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _SuggestedFlags>::ret,
CoeffReadCost = NumTraits<Scalar>::ReadCost CoeffReadCost = NumTraits<Scalar>::ReadCost
}; };
}; };

View File

@ -75,11 +75,8 @@ template<typename Derived> class MatrixBase
* it is set to the \a Dynamic constant. * it is set to the \a Dynamic constant.
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
SizeAtCompileTime SizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::RowsAtCompileTime,
= ei_traits<Derived>::RowsAtCompileTime == Dynamic ei_traits<Derived>::ColsAtCompileTime>::ret,
|| ei_traits<Derived>::ColsAtCompileTime == Dynamic
? Dynamic
: ei_traits<Derived>::RowsAtCompileTime * ei_traits<Derived>::ColsAtCompileTime,
/**< This is equal to the number of coefficients, i.e. the number of /**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not * rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
@ -106,11 +103,8 @@ template<typename Derived> class MatrixBase
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
*/ */
MaxSizeAtCompileTime MaxSizeAtCompileTime = ei_size_at_compile_time<ei_traits<Derived>::MaxRowsAtCompileTime,
= ei_traits<Derived>::MaxRowsAtCompileTime == Dynamic ei_traits<Derived>::MaxColsAtCompileTime>::ret,
|| ei_traits<Derived>::MaxColsAtCompileTime == Dynamic
? Dynamic
: ei_traits<Derived>::MaxRowsAtCompileTime * ei_traits<Derived>::MaxColsAtCompileTime,
/**< This value is equal to the maximum possible number of coefficients that this expression /**< This value is equal to the maximum possible number of coefficients that this expression
* might have. If this expression might have an arbitrarily high number of coefficients, * might have. If this expression might have an arbitrarily high number of coefficients,
* this value is set to \a Dynamic. * this value is set to \a Dynamic.

View File

@ -49,6 +49,28 @@ template <typename T, int Size> struct ei_aligned_array<T,Size,false>
T array[Size]; T array[Size];
}; };
template<typename T>
T* ei_aligned_malloc(size_t size)
{
#ifdef EIGEN_VECTORIZE
if (ei_packet_traits<T>::size>1)
return static_cast<T*>(_mm_malloc(sizeof(T)*size, 16));
else
#endif
return new T[size];
}
template<typename T>
void ei_aligned_free(T* ptr)
{
#ifdef EIGEN_VECTORIZE
if (ei_packet_traits<T>::size>1)
_mm_free(ptr);
else
#endif
delete[] ptr;
}
// purely fixed-size matrix // purely fixed-size matrix
template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage template<typename T, int Size, int _Rows, int _Cols> class ei_matrix_storage
{ {
@ -127,7 +149,7 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
int m_cols; int m_cols;
public: public:
ei_matrix_storage(int size, int rows, int cols) ei_matrix_storage(int size, int rows, int cols)
: m_data(new T[size]), m_rows(rows), m_cols(cols) {} : m_data(ei_aligned_malloc<T>(size)), m_rows(rows), m_cols(cols) {}
~ei_matrix_storage() { delete[] m_data; } ~ei_matrix_storage() { delete[] m_data; }
int rows(void) const {return m_rows;} int rows(void) const {return m_rows;}
int cols(void) const {return m_cols;} int cols(void) const {return m_cols;}
@ -135,8 +157,8 @@ template<typename T> class ei_matrix_storage<T, Dynamic, Dynamic, Dynamic>
{ {
if(size != m_rows*m_cols) if(size != m_rows*m_cols)
{ {
delete[] m_data; ei_aligned_free(m_data);
m_data = new T[size]; m_data = ei_aligned_malloc<T>(size);
} }
m_rows = rows; m_rows = rows;
m_cols = cols; m_cols = cols;
@ -151,7 +173,7 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
T *m_data; T *m_data;
int m_cols; int m_cols;
public: public:
ei_matrix_storage(int size, int, int cols) : m_data(new T[size]), m_cols(cols) {} ei_matrix_storage(int size, int, int cols) : m_data(ei_aligned_malloc<T>(size)), m_cols(cols) {}
~ei_matrix_storage() { delete[] m_data; } ~ei_matrix_storage() { delete[] m_data; }
static int rows(void) {return _Rows;} static int rows(void) {return _Rows;}
int cols(void) const {return m_cols;} int cols(void) const {return m_cols;}
@ -159,8 +181,8 @@ template<typename T, int _Rows> class ei_matrix_storage<T, Dynamic, _Rows, Dynam
{ {
if(size != _Rows*m_cols) if(size != _Rows*m_cols)
{ {
delete[] m_data; ei_aligned_free(m_data);
m_data = new T[size]; m_data = ei_aligned_malloc<T>(size);
} }
m_cols = cols; m_cols = cols;
} }
@ -174,7 +196,7 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
T *m_data; T *m_data;
int m_rows; int m_rows;
public: public:
ei_matrix_storage(int size, int rows, int) : m_data(new T[size]), m_rows(rows) {} ei_matrix_storage(int size, int rows, int) : m_data(ei_aligned_malloc<T>(size)), m_rows(rows) {}
~ei_matrix_storage() { delete[] m_data; } ~ei_matrix_storage() { delete[] m_data; }
int rows(void) const {return m_rows;} int rows(void) const {return m_rows;}
static int cols(void) {return _Cols;} static int cols(void) {return _Cols;}
@ -182,8 +204,8 @@ template<typename T, int _Cols> class ei_matrix_storage<T, Dynamic, Dynamic, _Co
{ {
if(size != m_rows*_Cols) if(size != m_rows*_Cols)
{ {
delete[] m_data; ei_aligned_free(m_data);
m_data = new T[size]; m_data = ei_aligned_malloc<T>(size);
} }
m_rows = rows; m_rows = rows;
} }

View File

@ -135,7 +135,7 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
| EvalBeforeAssigningBit | EvalBeforeAssigningBit
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimalProduct ? EvalBeforeNestingBit : 0)) | (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimalProduct ? EvalBeforeNestingBit : 0))
& ( & (
~(RowMajorBit | VectorizableBit) ~(RowMajorBit | VectorizableBit | Like1DArrayBit)
| ( | (
( (
!(Lhs::Flags & RowMajorBit) && (Lhs::Flags & VectorizableBit) !(Lhs::Flags & RowMajorBit) && (Lhs::Flags & VectorizableBit)
@ -178,7 +178,11 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
/** \internal */ /** \internal */
template<typename DestDerived> template<typename DestDerived>
void _cacheOptimalEval(DestDerived& res) const; void _cacheOptimalEval(DestDerived& res, ei_meta_false) const;
#ifdef EIGEN_VECTORIZE
template<typename DestDerived>
void _cacheOptimalEval(DestDerived& res, ei_meta_true) const;
#endif
private: private:
@ -267,59 +271,29 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
} }
template<typename Derived> template<typename Derived>
template<typename Derived1, typename Derived2> template<typename Lhs, typename Rhs>
Derived& MatrixBase<Derived>::lazyAssign(const Product<Derived1,Derived2,CacheOptimalProduct>& product) Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheOptimalProduct>& product)
{ {
product._cacheOptimalEval(*this); product._cacheOptimalEval(*this,
#ifdef EIGEN_VECTORIZE
typename ei_meta_if<(Flags & VectorizableBit)
&& (!(Lhs::Flags & RowMajorBit)
&& (Lhs::RowsAtCompileTime!=Dynamic)
&& (Lhs::RowsAtCompileTime%ei_packet_traits<Scalar>::size==0) ),
ei_meta_true,ei_meta_false>::ret()
#else
ei_meta_false
#endif
);
return derived(); return derived();
} }
template<typename Lhs, typename Rhs, int EvalMode> template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived> template<typename DestDerived>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_false) const
{ {
res.setZero(); res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC; const int cols4 = m_lhs.cols() & 0xfffffffC;
#ifdef EIGEN_VECTORIZE
if( (Flags & VectorizableBit) && (!(Lhs::Flags & RowMajorBit)) )
{
for(int k=0; k<this->cols(); k++)
{
int j=0;
for(; j<cols4; j+=4)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.writePacketCoeff(i,k,\
ei_padd(
res.packetCoeff(i,k),
ei_padd(
ei_padd(
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
ei_padd(
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
)
)
)
);
}
}
for(; j<m_lhs.cols(); ++j)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
for (int i=0; i<this->rows(); ++i)
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
}
}
}
else
#endif // EIGEN_VECTORIZE
{ {
for(int k=0; k<this->cols(); ++k) for(int k=0; k<this->cols(); ++k)
{ {
@ -344,4 +318,48 @@ void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
} }
} }
#ifdef EIGEN_VECTORIZE
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res, ei_meta_true) const
{
res.setZero();
const int cols4 = m_lhs.cols() & 0xfffffffC;
for(int k=0; k<this->cols(); k++)
{
int j=0;
for(; j<cols4; j+=4)
{
const typename ei_packet_traits<Scalar>::type tmp0 = ei_pset1(m_rhs.coeff(j+0,k));
const typename ei_packet_traits<Scalar>::type tmp1 = ei_pset1(m_rhs.coeff(j+1,k));
const typename ei_packet_traits<Scalar>::type tmp2 = ei_pset1(m_rhs.coeff(j+2,k));
const typename ei_packet_traits<Scalar>::type tmp3 = ei_pset1(m_rhs.coeff(j+3,k));
for (int i=0; i<this->rows(); i+=ei_packet_traits<Scalar>::size)
{
res.writePacketCoeff(i,k,\
ei_padd(
res.packetCoeff(i,k),
ei_padd(
ei_padd(
ei_pmul(tmp0, m_lhs.packetCoeff(i,j)),
ei_pmul(tmp1, m_lhs.packetCoeff(i,j+1))),
ei_padd(
ei_pmul(tmp2, m_lhs.packetCoeff(i,j+2)),
ei_pmul(tmp3, m_lhs.packetCoeff(i,j+3))
)
)
)
);
}
}
for(; j<m_lhs.cols(); ++j)
{
const typename ei_packet_traits<Scalar>::type tmp = ei_pset1(m_rhs.coeff(j,k));
for (int i=0; i<this->rows(); ++i)
res.writePacketCoeff(i,k,ei_pmul(tmp, m_lhs.packetCoeff(i,j)));
}
}
}
#endif // EIGEN_VECTORIZE
#endif // EIGEN_PRODUCT_H #endif // EIGEN_PRODUCT_H

View File

@ -71,6 +71,11 @@ template<typename ExpressionType> class Temporary
return m_expression.coeff(row, col); return m_expression.coeff(row, col);
} }
PacketScalar _packetCoeff(int row, int col) const
{
return m_expression.packetCoeff(row, col);
}
protected: protected:
const ExpressionType m_expression; const ExpressionType m_expression;
}; };

View File

@ -70,6 +70,9 @@ struct ei_meta_if <false, Then, Else> { typedef Else ret; };
template<typename T, typename U> struct ei_is_same_type { enum { ret = 0 }; }; template<typename T, typename U> struct ei_is_same_type { enum { ret = 0 }; };
template<typename T> struct ei_is_same_type<T,T> { enum { ret = 1 }; }; template<typename T> struct ei_is_same_type<T,T> { enum { ret = 1 }; };
struct ei_meta_true {};
struct ei_meta_false {};
/** \internal /** \internal
* Convenient struct to get the result type of a unary or binary functor. * Convenient struct to get the result type of a unary or binary functor.
@ -145,19 +148,12 @@ template<typename T> struct ei_packet_traits
enum {size=1}; enum {size=1};
}; };
template<typename Scalar, int Rows, int Cols, unsigned int SuggestedFlags> template<typename Scalar, int Size, unsigned int SuggestedFlags>
class ei_corrected_matrix_flags class ei_corrected_matrix_flags
{ {
enum { is_vectorizable enum { is_vectorizable
= ei_packet_traits<Scalar>::size > 1 = ei_packet_traits<Scalar>::size > 1
&& Rows!=Dynamic && (Size%ei_packet_traits<Scalar>::size==0),
&& Cols!=Dynamic
&&
(
SuggestedFlags&RowMajorBit
? Cols%ei_packet_traits<Scalar>::size==0
: Rows%ei_packet_traits<Scalar>::size==0
),
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit
}; };
@ -168,19 +164,24 @@ class ei_corrected_matrix_flags
}; };
}; };
template<int _Rows, int _Cols> struct ei_size_at_compile_time
{
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
};
template<typename T> class ei_eval template<typename T> class ei_eval
{ {
typedef typename ei_traits<T>::Scalar _Scalar; typedef typename ei_traits<T>::Scalar _Scalar;
enum { _Rows = ei_traits<T>::RowsAtCompileTime, enum {_MaxRows = ei_traits<T>::MaxRowsAtCompileTime,
_Cols = ei_traits<T>::ColsAtCompileTime, _MaxCols = ei_traits<T>::MaxColsAtCompileTime,
_Flags = ei_traits<T>::Flags _Flags = ei_traits<T>::Flags
}; };
public: public:
typedef Matrix<_Scalar, typedef Matrix<_Scalar,
_Rows, ei_traits<T>::RowsAtCompileTime,
_Cols, ei_traits<T>::ColsAtCompileTime,
ei_corrected_matrix_flags<_Scalar, _Rows, _Cols, _Flags>::ret, ei_corrected_matrix_flags<_Scalar, ei_size_at_compile_time<_MaxRows,_MaxCols>::ret, _Flags>::ret,
ei_traits<T>::MaxRowsAtCompileTime, ei_traits<T>::MaxRowsAtCompileTime,
ei_traits<T>::MaxColsAtCompileTime> type; ei_traits<T>::MaxColsAtCompileTime> type;
}; };