* move some compile time "if" to their respective unroller (assign and dot)

* fix a couple of compilation issues when unrolling is disabled
* reduce default unrolling limit to a more reasonable value
This commit is contained in:
Gael Guennebaud 2008-06-07 01:07:48 +00:00
parent a172385720
commit 6998037930
7 changed files with 145 additions and 133 deletions

View File

@ -58,10 +58,30 @@ struct ei_matrix_assignment_unroller<Derived1, Derived2, 0>
inline static void run(Derived1 &, const Derived2 &) {} inline static void run(Derived1 &, const Derived2 &) {}
}; };
// Dynamic col-major
template<typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic> struct ei_matrix_assignment_unroller<Derived1, Derived2, -1>
{ {
inline static void run(Derived1 &, const Derived2 &) {} inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
// Dynamic row-major
template<typename Derived1, typename Derived2>
struct ei_matrix_assignment_unroller<Derived1, Derived2, -2>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}; };
//---- //----
@ -103,10 +123,12 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
template <typename Derived, typename OtherDerived, template <typename Derived, typename OtherDerived,
bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit) bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)) && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit) && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
||((int(Derived::Flags)&RowMajorBit) || ((int(Derived::Flags) & RowMajorBit)
? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0) ? int(Derived::ColsAtCompileTime)!=Dynamic
: int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ), && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
: int(Derived::RowsAtCompileTime)!=Dynamic
&& (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT> bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
struct ei_assignment_impl; struct ei_assignment_impl;
@ -156,39 +178,21 @@ inline Derived& MatrixBase<Derived>
//---- //----
template <typename Derived, typename OtherDerived> // no vectorization
struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling template <typename Derived, typename OtherDerived, bool Unroll>
struct ei_assignment_impl<Derived, OtherDerived, false, Unroll>
{ {
static void run(Derived & dst, const OtherDerived & src) static void run(Derived & dst, const OtherDerived & src)
{ {
ei_matrix_assignment_unroller ei_matrix_assignment_unroller
<Derived, OtherDerived, int(Derived::SizeAtCompileTime) <Derived, OtherDerived,
Unroll ? int(Derived::SizeAtCompileTime)
: Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic ? -1 // col-major
: -2 // row-major
>::run(dst.derived(), src.derived()); >::run(dst.derived(), src.derived());
} }
}; };
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order
{
static void run(Derived & dst, const OtherDerived & src)
{
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}
};
//---- //----
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
@ -224,7 +228,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unroll
}; };
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array like
{ {
static void run(Derived & dst, const OtherDerived & src) static void run(Derived & dst, const OtherDerived & src)
{ {

View File

@ -26,17 +26,17 @@
#define EIGEN_DOT_H #define EIGEN_DOT_H
template<int Index, int Size, typename Derived1, typename Derived2> template<int Index, int Size, typename Derived1, typename Derived2>
struct ei_dot_unroller struct ei_dot_impl
{ {
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot) inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
{ {
ei_dot_unroller<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot); ei_dot_impl<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
dot += v1.coeff(Index) * ei_conj(v2.coeff(Index)); dot += v1.coeff(Index) * ei_conj(v2.coeff(Index));
} }
}; };
template<int Size, typename Derived1, typename Derived2> template<int Size, typename Derived1, typename Derived2>
struct ei_dot_unroller<0, Size, Derived1, Derived2> struct ei_dot_impl<0, Size, Derived1, Derived2>
{ {
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot) inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
{ {
@ -44,15 +44,20 @@ struct ei_dot_unroller<0, Size, Derived1, Derived2>
} }
}; };
template<int Index, typename Derived1, typename Derived2> template<typename Derived1, typename Derived2>
struct ei_dot_unroller<Index, Dynamic, Derived1, Derived2> struct ei_dot_impl<Dynamic, Dynamic, Derived1, Derived2>
{ {
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {} inline static void run(const Derived1& v1, const Derived2& v2, typename Derived1::Scalar& dot)
{
dot = v1.coeff(0) * ei_conj(v2.coeff(0));
for(int i = 1; i < v1.size(); i++)
dot += v1.coeff(i)* ei_conj(v2.coeff(i));
}
}; };
// prevent buggy user code from causing an infinite recursion // prevent buggy user code from causing an infinite recursion
template<int Index, typename Derived1, typename Derived2> template<int Index, typename Derived1, typename Derived2>
struct ei_dot_unroller<Index, 0, Derived1, Derived2> struct ei_dot_impl<Index, 0, Derived1, Derived2>
{ {
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {} inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
}; };
@ -83,22 +88,16 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested); EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested);
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested); EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested);
ei_assert(nested.size() == otherNested.size()); ei_assert(nested.size() == otherNested.size());
Scalar res;
const bool unroll = SizeAtCompileTime const bool unroll = SizeAtCompileTime
* (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost) * (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost)
+ (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost + (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost
<= EIGEN_UNROLLING_LIMIT; <= EIGEN_UNROLLING_LIMIT;
if(unroll)
ei_dot_unroller<int(SizeAtCompileTime)-1, Scalar res;
unroll ? int(SizeAtCompileTime) : Dynamic, ei_dot_impl<unroll ? int(SizeAtCompileTime)-1 : Dynamic,
_Nested, _OtherNested> unroll ? int(SizeAtCompileTime) : Dynamic,
::run(nested, otherNested, res); _Nested, _OtherNested>
else ::run(nested, otherNested, res);
{
res = nested.coeff(0) * ei_conj(otherNested.coeff(0));
for(int i = 1; i < size(); i++)
res += nested.coeff(i)* ei_conj(otherNested.coeff(i));
}
return res; return res;
} }

View File

@ -92,7 +92,7 @@ inline void Part<MatrixType, Mode>::operator=(const Other& other)
} }
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount> template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount>
struct ei_part_assignment_unroller struct ei_part_assignment_impl
{ {
enum { enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime, col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@ -101,7 +101,7 @@ struct ei_part_assignment_unroller
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
ei_part_assignment_unroller<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src); ei_part_assignment_impl<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
if(Mode == SelfAdjoint) if(Mode == SelfAdjoint)
{ {
@ -122,7 +122,7 @@ struct ei_part_assignment_unroller
}; };
template<typename Derived1, typename Derived2, unsigned int Mode> template<typename Derived1, typename Derived2, unsigned int Mode>
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1> struct ei_part_assignment_impl<Derived1, Derived2, Mode, 1>
{ {
inline static void run(Derived1 &dst, const Derived2 &src) inline static void run(Derived1 &dst, const Derived2 &src)
{ {
@ -133,17 +133,66 @@ struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
// prevent buggy user code from causing an infinite recursion // prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2, unsigned int Mode> template<typename Derived1, typename Derived2, unsigned int Mode>
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 0> struct ei_part_assignment_impl<Derived1, Derived2, Mode, 0>
{ {
inline static void run(Derived1 &, const Derived2 &) {} inline static void run(Derived1 &, const Derived2 &) {}
}; };
template<typename Derived1, typename Derived2, unsigned int Mode> template<typename Derived1, typename Derived2>
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, Dynamic> struct ei_part_assignment_impl<Derived1, Derived2, Upper, Dynamic>
{ {
inline static void run(Derived1 &, const Derived2 &) {} inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i <= j; i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}; };
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, Lower, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = j; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpper, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < j; i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLower, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = j+1; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, SelfAdjoint, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
{
for(int i = 0; i < j; i++)
dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j));
dst.coeffRef(j, j) = ei_real(src.coeff(j, j));
}
}
};
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
template<typename Other> template<typename Other>
@ -151,47 +200,11 @@ void Part<MatrixType, Mode>::lazyAssign(const Other& other)
{ {
const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT; const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT;
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols()); ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
if(unroll)
{ ei_part_assignment_impl
ei_part_assignment_unroller <MatrixType, Other, Mode,
<MatrixType, Other, Mode, unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic >::run(m_matrix, other.derived());
>::run(m_matrix, other.derived());
}
else
{
switch(Mode)
{
case Upper:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = 0; i <= j; i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case Lower:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = j; i < m_matrix.rows(); i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case StrictlyUpper:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = 0; i < j; i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case StrictlyLower:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = j+1; i < m_matrix.rows(); i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case SelfAdjoint:
for(int j = 0; j < m_matrix.cols(); j++)
{
for(int i = 0; i < j; i++)
m_matrix.coeffRef(j, i) = ei_conj(m_matrix.coeffRef(i, j) = other.coeff(i, j));
m_matrix.coeffRef(j, j) = ei_real(other.coeff(j, j));
}
break;
}
}
} }
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>

View File

@ -47,8 +47,8 @@ struct ei_product_impl<0, Size, Lhs, Rhs>
} }
}; };
template<int Index, typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct ei_product_impl<Index, Dynamic, Lhs, Rhs> struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs>
{ {
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res) inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
{ {
@ -268,7 +268,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
{ {
Scalar res; Scalar res;
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
ei_product_impl<Lhs::ColsAtCompileTime-1, ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic,
unroll ? Lhs::ColsAtCompileTime : Dynamic, unroll ? Lhs::ColsAtCompileTime : Dynamic,
_LhsNested, _RhsNested> _LhsNested, _RhsNested>
::run(row, col, m_lhs, m_rhs, res); ::run(row, col, m_lhs, m_rhs, res);

View File

@ -63,7 +63,17 @@ template<typename BinaryOp, typename Derived, int Start>
struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic> struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic>
{ {
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar; typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
static Scalar run(const Derived&, const BinaryOp&) { return Scalar(); } static Scalar run(const Derived& mat, const BinaryOp& func)
{
Scalar res;
res = mat.coeff(0,0);
for(int i = 1; i < mat.rows(); i++)
res = func(res, mat.coeff(i, 0));
for(int j = 1; j < mat.cols(); j++)
for(int i = 0; i < mat.rows(); i++)
res = func(res, mat.coeff(i, j));
return res;
}
}; };
/** \returns the result of a full redux operation on the whole matrix or vector using \a func /** \returns the result of a full redux operation on the whole matrix or vector using \a func
@ -81,21 +91,9 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
const bool unroll = SizeAtCompileTime * CoeffReadCost const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost + (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
<= EIGEN_UNROLLING_LIMIT; <= EIGEN_UNROLLING_LIMIT;
if(unroll) return ei_redux_unroller<BinaryOp, Derived, 0,
return ei_redux_unroller<BinaryOp, Derived, 0, unroll ? int(SizeAtCompileTime) : Dynamic>
unroll ? int(SizeAtCompileTime) : Dynamic> ::run(derived(), func);
::run(derived(), func);
else
{
Scalar res;
res = coeff(0,0);
for(int i = 1; i < rows(); i++)
res = func(res, coeff(i, 0));
for(int j = 1; j < cols(); j++)
for(int i = 0; i < rows(); i++)
res = func(res, coeff(i, j));
return res;
}
} }
/** \returns the sum of all coefficients of *this /** \returns the sum of all coefficients of *this

View File

@ -52,7 +52,15 @@ struct ei_visitor_unroller<Visitor, Derived, 1>
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct ei_visitor_unroller<Visitor, Derived, Dynamic> struct ei_visitor_unroller<Visitor, Derived, Dynamic>
{ {
inline static void run(const Derived &, Visitor&) {} inline static void run(const Derived& mat, Visitor& visitor)
{
visitor.init(mat.coeff(0,0), 0, 0);
for(int i = 1; i < mat.rows(); i++)
visitor(mat.coeff(i, 0), i, 0);
for(int j = 1; j < mat.cols(); j++)
for(int i = 0; i < mat.rows(); i++)
visitor(mat.coeff(i, j), i, j);
}
}; };
@ -77,19 +85,9 @@ void MatrixBase<Derived>::visit(Visitor& visitor) const
const bool unroll = SizeAtCompileTime * CoeffReadCost const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost + (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost
<= EIGEN_UNROLLING_LIMIT; <= EIGEN_UNROLLING_LIMIT;
if(unroll) return ei_visitor_unroller<Visitor, Derived,
return ei_visitor_unroller<Visitor, Derived, unroll ? int(SizeAtCompileTime) : Dynamic
unroll ? int(SizeAtCompileTime) : Dynamic >::run(derived(), visitor);
>::run(derived(), visitor);
else
{
visitor.init(coeff(0,0), 0, 0);
for(int i = 1; i < rows(); i++)
visitor(coeff(i, 0), i, 0);
for(int j = 1; j < cols(); j++)
for(int i = 0; i < rows(); i++)
visitor(coeff(i, j), i, j);
}
} }
/** \internal /** \internal

View File

@ -34,7 +34,7 @@
/** Defines the maximal loop size to enable meta unrolling of loops */ /** Defines the maximal loop size to enable meta unrolling of loops */
#ifndef EIGEN_UNROLLING_LIMIT #ifndef EIGEN_UNROLLING_LIMIT
#define EIGEN_UNROLLING_LIMIT 400 #define EIGEN_UNROLLING_LIMIT 100
#endif #endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR