* support for matrix-scalar quotient with integer scalar types.

* added cache efficient matrix-matrix product.
   - provides a huge speed-up for large matrices.
   - currently it is enabled when an explicit unrolling is not possible.
This commit is contained in:
Gael Guennebaud 2008-03-21 20:26:14 +00:00
parent 0ef1efdbdb
commit 4342f024d9
7 changed files with 116 additions and 22 deletions

View File

@ -208,6 +208,34 @@ struct ei_scalar_multiple_op {
const Scalar m_other; const Scalar m_other;
}; };
template<typename Scalar, bool HasFloatingPoint>
struct ei_scalar_quotient1_impl {
ei_scalar_quotient1_impl(const Scalar& other) : m_other(static_cast<Scalar>(1) / other) {}
Scalar operator() (const Scalar& a) const { return a * m_other; }
const Scalar m_other;
};
template<typename Scalar>
struct ei_scalar_quotient1_impl<Scalar,false> {
ei_scalar_quotient1_impl(const Scalar& other) : m_other(other) {}
Scalar operator() (const Scalar& a) const { return a / m_other; }
const Scalar m_other;
};
/** \internal
* \brief Template functor to divide a scalar by a fixed other one
*
* This functor is used to implement the quotient of a matrix by
* a scalar where the scalar type is not a floating point type.
*
* \sa class CwiseUnaryOp, MatrixBase::operator/
*/
template<typename Scalar>
struct ei_scalar_quotient1_op : ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint > {
ei_scalar_quotient1_op(const Scalar& other)
: ei_scalar_quotient1_impl<Scalar, NumTraits<Scalar>::HasFloatingPoint >(other) {}
};
/** \relates MatrixBase */ /** \relates MatrixBase */
template<typename Derived> template<typename Derived>
const CwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<Derived>::Scalar>, Derived> const CwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<Derived>::Scalar>, Derived>
@ -219,12 +247,11 @@ MatrixBase<Derived>::operator*(const Scalar& scalar) const
/** \relates MatrixBase */ /** \relates MatrixBase */
template<typename Derived> template<typename Derived>
const CwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<Derived>::Scalar>, Derived> const CwiseUnaryOp<ei_scalar_quotient1_op<typename ei_traits<Derived>::Scalar>, Derived>
MatrixBase<Derived>::operator/(const Scalar& scalar) const MatrixBase<Derived>::operator/(const Scalar& scalar) const
{ {
assert(NumTraits<Scalar>::HasFloatingPoint); return CwiseUnaryOp<ei_scalar_quotient1_op<Scalar>, Derived>
return CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> (derived(), ei_scalar_quotient1_op<Scalar>(scalar));
(derived(), ei_scalar_multiple_op<Scalar>(static_cast<Scalar>(1) / scalar));
} }
template<typename Derived> template<typename Derived>

View File

@ -26,6 +26,7 @@
#define EIGEN_FORWARDDECLARATIONS_H #define EIGEN_FORWARDDECLARATIONS_H
template<typename T> struct ei_traits; template<typename T> struct ei_traits;
template<typename Lhs, typename Rhs> struct ei_product_eval_mode;
template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols> class Matrix; template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols> class Matrix;
template<typename MatrixType> class MatrixRef; template<typename MatrixType> class MatrixRef;
@ -35,7 +36,7 @@ template<typename MatrixType> class Transpose;
template<typename MatrixType> class Conjugate; template<typename MatrixType> class Conjugate;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp; template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp; template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename Lhs, typename Rhs> class Product; template<typename Lhs, typename Rhs, int EvalMode=ei_product_eval_mode<Lhs,Rhs>::EvalMode > class Product;
template<typename MatrixType> class Random; template<typename MatrixType> class Random;
template<typename MatrixType> class Zero; template<typename MatrixType> class Zero;
template<typename MatrixType> class Ones; template<typename MatrixType> class Ones;
@ -60,9 +61,10 @@ struct ei_scalar_exp_op;
struct ei_scalar_log_op; struct ei_scalar_log_op;
struct ei_scalar_cos_op; struct ei_scalar_cos_op;
struct ei_scalar_sin_op; struct ei_scalar_sin_op;
template<typename Scalar> struct ei_scalar_pow_op; template<typename Scalar> struct ei_scalar_pow_op;
template<typename NewType> struct ei_scalar_cast_op; template<typename NewType> struct ei_scalar_cast_op;
template<typename Scalar> struct ei_scalar_multiple_op; template<typename Scalar> struct ei_scalar_multiple_op;
template<typename Scalar> struct ei_scalar_quotient1_op;
struct ei_scalar_min_op; struct ei_scalar_min_op;
struct ei_scalar_max_op; struct ei_scalar_max_op;

View File

@ -162,7 +162,7 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols,
resize(other.size(), 1); resize(other.size(), 1);
} }
else resize(other.rows(), other.cols()); else resize(other.rows(), other.cols());
return MatrixBase<Matrix>::operator=(other); return Base::operator=(other.derived());
} }
/** This is a special case of the templated operator=. Its purpose is to /** This is a special case of the templated operator=. Its purpose is to

View File

@ -170,6 +170,10 @@ template<typename Derived> class MatrixBase
return this->operator=<Derived>(other); return this->operator=<Derived>(other);
} }
/** Overloaded for optimal product evaluation */
template<typename Derived1, typename Derived2>
Derived& operator=(const Product<Derived1,Derived2,CacheOptimal>& product);
CommaInitializer operator<< (const Scalar& s); CommaInitializer operator<< (const Scalar& s);
template<typename OtherDerived> template<typename OtherDerived>
@ -223,7 +227,7 @@ template<typename Derived> class MatrixBase
Derived& operator/=(const Scalar& other); Derived& operator/=(const Scalar& other);
const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> operator*(const Scalar& scalar) const; const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> operator*(const Scalar& scalar) const;
const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> operator/(const Scalar& scalar) const; const CwiseUnaryOp<ei_scalar_quotient1_op<Scalar>, Derived> operator/(const Scalar& scalar) const;
friend const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived> friend const CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, Derived>
operator*(const Scalar& scalar, const MatrixBase& matrix) operator*(const Scalar& scalar, const MatrixBase& matrix)

View File

@ -148,7 +148,7 @@ Derived& MatrixBase<Derived>
coeffRef(i, j) = other.coeff(i, j); coeffRef(i, j) = other.coeff(i, j);
} }
} }
return *static_cast<Derived*>(this); return (*this).derived();
} }
} }

View File

@ -65,6 +65,7 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
* *
* \param Lhs the type of the left-hand side * \param Lhs the type of the left-hand side
* \param Rhs the type of the right-hand side * \param Rhs the type of the right-hand side
* \param EvalMode internal use only
* *
* This class represents an expression of the product of two matrices. * This class represents an expression of the product of two matrices.
* It is the return type of MatrixBase::lazyProduct(), which is used internally by * It is the return type of MatrixBase::lazyProduct(), which is used internally by
@ -72,8 +73,8 @@ struct ei_product_unroller<Index, 0, Lhs, Rhs>
* *
* \sa class Sum, class Difference * \sa class Sum, class Difference
*/ */
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs> > struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{ {
typedef typename Lhs::Scalar Scalar; typedef typename Lhs::Scalar Scalar;
enum { enum {
@ -84,8 +85,19 @@ struct ei_traits<Product<Lhs, Rhs> >
}; };
}; };
template<typename Lhs, typename Rhs> class Product : ei_no_assignment_operator, template<typename Lhs, typename Rhs>
public MatrixBase<Product<Lhs, Rhs> > struct ei_product_eval_mode
{
enum {
SizeAtCompileTime = MatrixBase<Product<Lhs,Rhs,UnrolledDotProduct> >::SizeAtCompileTime,
EvalMode = ( EIGEN_UNROLLED_LOOPS
&& SizeAtCompileTime != Dynamic
&& SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ? UnrolledDotProduct : CacheOptimal,
};
};
template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignment_operator,
public MatrixBase<Product<Lhs, Rhs, EvalMode> >
{ {
public: public:
@ -97,6 +109,10 @@ template<typename Lhs, typename Rhs> class Product : ei_no_assignment_operator,
assert(lhs.cols() == rhs.rows()); assert(lhs.cols() == rhs.rows());
} }
/** \internal */
template<typename DestDerived>
void _cacheOptimalEval(DestDerived& res) const;
private: private:
int _rows() const { return m_lhs.rows(); } int _rows() const { return m_lhs.rows(); }
@ -156,7 +172,7 @@ template<typename OtherDerived>
const Eval<Product<Derived, OtherDerived> > const Eval<Product<Derived, OtherDerived> >
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{ {
return lazyProduct(other).eval(); return (*this).lazyProduct(other).eval();
} }
/** replaces \c *this by \c *this * \a other. /** replaces \c *this by \c *this * \a other.
@ -171,4 +187,39 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
return *this = *this * other; return *this = *this * other;
} }
template<typename Derived>
template<typename Derived1, typename Derived2>
Derived& MatrixBase<Derived>::operator=(const Product<Derived1,Derived2,CacheOptimal>& product)
{
product._cacheOptimalEval(*this);
return (*this).derived();
}
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
void Product<Lhs,Rhs,EvalMode>::_cacheOptimalEval(DestDerived& res) const
{
res.setZero();
const int cols4 = m_lhs.cols()&0xfffffffC;
for (int k=0; k<m_rhs.cols(); ++k)
{
int j=0;
for (; j<cols4; j+=4)
{
const Scalar tmp0 = m_rhs.coeff(j ,k);
const Scalar tmp1 = m_rhs.coeff(j+1,k);
const Scalar tmp2 = m_rhs.coeff(j+2,k);
const Scalar tmp3 = m_rhs.coeff(j+3,k);
for (int i=0; i<m_lhs.rows(); ++i)
res.coeffRef(i,k) += tmp0 * m_lhs.coeff(i,j) + tmp1 * m_lhs.coeff(i,j+1) + tmp2 * m_lhs.coeff(i,j+2) + tmp3 * m_lhs.coeff(i,j+3);
}
for (; j<m_lhs.cols(); ++j)
{
const Scalar tmp = m_rhs.coeff(j,k);
for (int i=0; i<m_lhs.rows(); ++i)
res.coeffRef(i,k) += tmp * m_lhs.coeff(i,j);
}
}
}
#endif // EIGEN_PRODUCT_H #endif // EIGEN_PRODUCT_H

View File

@ -122,6 +122,8 @@ enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
enum DirectionType { Vertical, Horizontal }; enum DirectionType { Vertical, Horizontal };
enum ProductEvaluationMode { UnrolledDotProduct, CacheOptimal };
// just a workaround because GCC seems to not really like empty structs // just a workaround because GCC seems to not really like empty structs
#ifdef __GNUG__ #ifdef __GNUG__
struct ei_empty_struct{char _ei_dummy_;}; struct ei_empty_struct{char _ei_dummy_;};
@ -156,20 +158,28 @@ template<> class ei_int_if_dynamic<Dynamic>
void setValue(int value) { m_value = value; } void setValue(int value) { m_value = value; }
}; };
struct ei_has_nothing {int a[1];};
struct ei_has_std_result_type {int a[2];}; template <bool Condition, class Then, class Else>
struct ei_has_tr1_result {int a[3];}; struct ei_meta_if { typedef Then ret; };
template <class Then, class Else>
struct ei_meta_if <false, Then, Else> { typedef Else ret; };
/** \internal /** \internal
* Convenient struct to get the result type of a unary or binary functor. * Convenient struct to get the result type of a unary or binary functor.
* *
* It supports both the current STL mechanism (using the result_type member) as well as * It supports both the current STL mechanism (using the result_type member) as well as
* upcoming next STL generation (using a templated result member). * upcoming next STL generation (using a templated result member).
* If none of these member is provided, then the type of the first argument is returned. * If none of these members is provided, then the type of the first argument is returned.
*/ */
template<typename T> struct ei_result_of {}; template<typename T> struct ei_result_of {};
template<typename Func, typename ArgType, int SizeOf=sizeof(ei_has_nothing)> struct ei_has_none {int a[1];};
struct ei_has_std_result_type {int a[2];};
struct ei_has_tr1_result {int a[3];};
template<typename Func, typename ArgType, int SizeOf=sizeof(ei_has_none)>
struct ei_unary_result_of_select {typedef ArgType type;}; struct ei_unary_result_of_select {typedef ArgType type;};
template<typename Func, typename ArgType> template<typename Func, typename ArgType>
@ -184,12 +194,12 @@ struct ei_result_of<Func(ArgType)> {
static ei_has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); static ei_has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
template<typename T> template<typename T>
static ei_has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0); static ei_has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
static ei_has_nothing testFunctor(...); static ei_has_none testFunctor(...);
typedef typename ei_unary_result_of_select<Func, ArgType, sizeof(testFunctor(static_cast<Func*>(0)))>::type type; typedef typename ei_unary_result_of_select<Func, ArgType, sizeof(testFunctor(static_cast<Func*>(0)))>::type type;
}; };
template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(ei_has_nothing)> template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(ei_has_none)>
struct ei_binary_result_of_select {typedef ArgType0 type;}; struct ei_binary_result_of_select {typedef ArgType0 type;};
template<typename Func, typename ArgType0, typename ArgType1> template<typename Func, typename ArgType0, typename ArgType1>
@ -206,7 +216,7 @@ struct ei_result_of<Func(ArgType0,ArgType1)> {
static ei_has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); static ei_has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
template<typename T> template<typename T>
static ei_has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0); static ei_has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
static ei_has_nothing testFunctor(...); static ei_has_none testFunctor(...);
typedef typename ei_binary_result_of_select<Func, ArgType0, ArgType1, sizeof(testFunctor(static_cast<Func*>(0)))>::type type; typedef typename ei_binary_result_of_select<Func, ArgType0, ArgType1, sizeof(testFunctor(static_cast<Func*>(0)))>::type type;
}; };