* move some compile time "if" to their respective unroller (assign and dot)

* fix a couple of compilation issues when unrolling is disabled
* reduce default unrolling limit to a more reasonable value
This commit is contained in:
Gael Guennebaud 2008-06-07 01:07:48 +00:00
parent a172385720
commit 6998037930
7 changed files with 145 additions and 133 deletions

View File

@ -58,10 +58,30 @@ struct ei_matrix_assignment_unroller<Derived1, Derived2, 0>
inline static void run(Derived1 &, const Derived2 &) {}
};
// Dynamic col-major
template<typename Derived1, typename Derived2>
struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic>
struct ei_matrix_assignment_unroller<Derived1, Derived2, -1>
{
inline static void run(Derived1 &, const Derived2 &) {}
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
// Dynamic row-major
template<typename Derived1, typename Derived2>
struct ei_matrix_assignment_unroller<Derived1, Derived2, -2>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
//----
@ -105,8 +125,10 @@ bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableB
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
|| ((int(Derived::Flags) & RowMajorBit)
? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
: int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
? int(Derived::ColsAtCompileTime)!=Dynamic
&& (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
: int(Derived::RowsAtCompileTime)!=Dynamic
&& (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
struct ei_assignment_impl;
@ -156,39 +178,21 @@ inline Derived& MatrixBase<Derived>
//----
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling
// no vectorization
template <typename Derived, typename OtherDerived, bool Unroll>
struct ei_assignment_impl<Derived, OtherDerived, false, Unroll>
{
static void run(Derived & dst, const OtherDerived & src)
{
ei_matrix_assignment_unroller
<Derived, OtherDerived, int(Derived::SizeAtCompileTime)
<Derived, OtherDerived,
Unroll ? int(Derived::SizeAtCompileTime)
: Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic ? -1 // col-major
: -2 // row-major
>::run(dst.derived(), src.derived());
}
};
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order
{
static void run(Derived & dst, const OtherDerived & src)
{
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}
};
//----
template <typename Derived, typename OtherDerived>
@ -224,7 +228,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unroll
};
template <typename Derived, typename OtherDerived>
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array like
{
static void run(Derived & dst, const OtherDerived & src)
{

View File

@ -26,17 +26,17 @@
#define EIGEN_DOT_H
template<int Index, int Size, typename Derived1, typename Derived2>
struct ei_dot_unroller
struct ei_dot_impl
{
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
{
ei_dot_unroller<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
ei_dot_impl<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
dot += v1.coeff(Index) * ei_conj(v2.coeff(Index));
}
};
template<int Size, typename Derived1, typename Derived2>
struct ei_dot_unroller<0, Size, Derived1, Derived2>
struct ei_dot_impl<0, Size, Derived1, Derived2>
{
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
{
@ -44,15 +44,20 @@ struct ei_dot_unroller<0, Size, Derived1, Derived2>
}
};
template<int Index, typename Derived1, typename Derived2>
struct ei_dot_unroller<Index, Dynamic, Derived1, Derived2>
template<typename Derived1, typename Derived2>
struct ei_dot_impl<Dynamic, Dynamic, Derived1, Derived2>
{
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
inline static void run(const Derived1& v1, const Derived2& v2, typename Derived1::Scalar& dot)
{
dot = v1.coeff(0) * ei_conj(v2.coeff(0));
for(int i = 1; i < v1.size(); i++)
dot += v1.coeff(i)* ei_conj(v2.coeff(i));
}
};
// prevent buggy user code from causing an infinite recursion
template<int Index, typename Derived1, typename Derived2>
struct ei_dot_unroller<Index, 0, Derived1, Derived2>
struct ei_dot_impl<Index, 0, Derived1, Derived2>
{
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
};
@ -83,22 +88,16 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested);
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested);
ei_assert(nested.size() == otherNested.size());
Scalar res;
const bool unroll = SizeAtCompileTime
* (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost)
+ (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost
<= EIGEN_UNROLLING_LIMIT;
if(unroll)
ei_dot_unroller<int(SizeAtCompileTime)-1,
Scalar res;
ei_dot_impl<unroll ? int(SizeAtCompileTime)-1 : Dynamic,
unroll ? int(SizeAtCompileTime) : Dynamic,
_Nested, _OtherNested>
::run(nested, otherNested, res);
else
{
res = nested.coeff(0) * ei_conj(otherNested.coeff(0));
for(int i = 1; i < size(); i++)
res += nested.coeff(i)* ei_conj(otherNested.coeff(i));
}
return res;
}

View File

@ -92,7 +92,7 @@ inline void Part<MatrixType, Mode>::operator=(const Other& other)
}
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount>
struct ei_part_assignment_unroller
struct ei_part_assignment_impl
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
@ -101,7 +101,7 @@ struct ei_part_assignment_unroller
inline static void run(Derived1 &dst, const Derived2 &src)
{
ei_part_assignment_unroller<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
ei_part_assignment_impl<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
if(Mode == SelfAdjoint)
{
@ -122,7 +122,7 @@ struct ei_part_assignment_unroller
};
template<typename Derived1, typename Derived2, unsigned int Mode>
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
struct ei_part_assignment_impl<Derived1, Derived2, Mode, 1>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
@ -133,17 +133,66 @@ struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
// prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2, unsigned int Mode>
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 0>
struct ei_part_assignment_impl<Derived1, Derived2, Mode, 0>
{
inline static void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, unsigned int Mode>
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, Dynamic>
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, Upper, Dynamic>
{
inline static void run(Derived1 &, const Derived2 &) {}
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i <= j; i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, Lower, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = j; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpper, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < j; i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLower, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = j+1; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template<typename Derived1, typename Derived2>
struct ei_part_assignment_impl<Derived1, Derived2, SelfAdjoint, Dynamic>
{
inline static void run(Derived1 &dst, const Derived2 &src)
{
for(int j = 0; j < dst.cols(); j++)
{
for(int i = 0; i < j; i++)
dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j));
dst.coeffRef(j, j) = ei_real(src.coeff(j, j));
}
}
};
template<typename MatrixType, unsigned int Mode>
template<typename Other>
@ -151,48 +200,12 @@ void Part<MatrixType, Mode>::lazyAssign(const Other& other)
{
const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT;
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
if(unroll)
{
ei_part_assignment_unroller
ei_part_assignment_impl
<MatrixType, Other, Mode,
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic
>::run(m_matrix, other.derived());
}
else
{
switch(Mode)
{
case Upper:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = 0; i <= j; i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case Lower:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = j; i < m_matrix.rows(); i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case StrictlyUpper:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = 0; i < j; i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case StrictlyLower:
for(int j = 0; j < m_matrix.cols(); j++)
for(int i = j+1; i < m_matrix.rows(); i++)
m_matrix.coeffRef(i, j) = other.coeff(i, j);
break;
case SelfAdjoint:
for(int j = 0; j < m_matrix.cols(); j++)
{
for(int i = 0; i < j; i++)
m_matrix.coeffRef(j, i) = ei_conj(m_matrix.coeffRef(i, j) = other.coeff(i, j));
m_matrix.coeffRef(j, j) = ei_real(other.coeff(j, j));
}
break;
}
}
}
template<typename MatrixType, unsigned int Mode>
template<typename Other> inline void Part<MatrixType, Mode>::operator+=(const Other& other)

View File

@ -47,8 +47,8 @@ struct ei_product_impl<0, Size, Lhs, Rhs>
}
};
template<int Index, typename Lhs, typename Rhs>
struct ei_product_impl<Index, Dynamic, Lhs, Rhs>
template<typename Lhs, typename Rhs>
struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs>
{
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
{
@ -268,7 +268,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
{
Scalar res;
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
ei_product_impl<Lhs::ColsAtCompileTime-1,
ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic,
unroll ? Lhs::ColsAtCompileTime : Dynamic,
_LhsNested, _RhsNested>
::run(row, col, m_lhs, m_rhs, res);

View File

@ -63,7 +63,17 @@ template<typename BinaryOp, typename Derived, int Start>
struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic>
{
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
static Scalar run(const Derived&, const BinaryOp&) { return Scalar(); }
static Scalar run(const Derived& mat, const BinaryOp& func)
{
Scalar res;
res = mat.coeff(0,0);
for(int i = 1; i < mat.rows(); i++)
res = func(res, mat.coeff(i, 0));
for(int j = 1; j < mat.cols(); j++)
for(int i = 0; i < mat.rows(); i++)
res = func(res, mat.coeff(i, j));
return res;
}
};
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
@ -81,21 +91,9 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
<= EIGEN_UNROLLING_LIMIT;
if(unroll)
return ei_redux_unroller<BinaryOp, Derived, 0,
unroll ? int(SizeAtCompileTime) : Dynamic>
::run(derived(), func);
else
{
Scalar res;
res = coeff(0,0);
for(int i = 1; i < rows(); i++)
res = func(res, coeff(i, 0));
for(int j = 1; j < cols(); j++)
for(int i = 0; i < rows(); i++)
res = func(res, coeff(i, j));
return res;
}
}
/** \returns the sum of all coefficients of *this

View File

@ -52,7 +52,15 @@ struct ei_visitor_unroller<Visitor, Derived, 1>
template<typename Visitor, typename Derived>
struct ei_visitor_unroller<Visitor, Derived, Dynamic>
{
inline static void run(const Derived &, Visitor&) {}
inline static void run(const Derived& mat, Visitor& visitor)
{
visitor.init(mat.coeff(0,0), 0, 0);
for(int i = 1; i < mat.rows(); i++)
visitor(mat.coeff(i, 0), i, 0);
for(int j = 1; j < mat.cols(); j++)
for(int i = 0; i < mat.rows(); i++)
visitor(mat.coeff(i, j), i, j);
}
};
@ -77,19 +85,9 @@ void MatrixBase<Derived>::visit(Visitor& visitor) const
const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost
<= EIGEN_UNROLLING_LIMIT;
if(unroll)
return ei_visitor_unroller<Visitor, Derived,
unroll ? int(SizeAtCompileTime) : Dynamic
>::run(derived(), visitor);
else
{
visitor.init(coeff(0,0), 0, 0);
for(int i = 1; i < rows(); i++)
visitor(coeff(i, 0), i, 0);
for(int j = 1; j < cols(); j++)
for(int i = 0; i < rows(); i++)
visitor(coeff(i, j), i, j);
}
}
/** \internal

View File

@ -34,7 +34,7 @@
/** Defines the maximal loop size to enable meta unrolling of loops */
#ifndef EIGEN_UNROLLING_LIMIT
#define EIGEN_UNROLLING_LIMIT 400
#define EIGEN_UNROLLING_LIMIT 100
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR