mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-18 12:23:13 +08:00
* move some compile time "if" to their respective unroller (assign and dot)
* fix a couple of compilation issues when unrolling is disabled * reduce default unrolling limit to a more reasonable value
This commit is contained in:
parent
a172385720
commit
6998037930
@ -58,10 +58,30 @@ struct ei_matrix_assignment_unroller<Derived1, Derived2, 0>
|
||||
inline static void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
// Dynamic col-major
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic>
|
||||
struct ei_matrix_assignment_unroller<Derived1, Derived2, -1>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) {}
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
// Dynamic row-major
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_matrix_assignment_unroller<Derived1, Derived2, -2>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
// traverse in row-major order
|
||||
// in order to allow the compiler to unroll the inner loop
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
//----
|
||||
@ -105,8 +125,10 @@ bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableB
|
||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
|
||||
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
|
||||
|| ((int(Derived::Flags) & RowMajorBit)
|
||||
? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
|
||||
: int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
|
||||
? int(Derived::ColsAtCompileTime)!=Dynamic
|
||||
&& (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
|
||||
: int(Derived::RowsAtCompileTime)!=Dynamic
|
||||
&& (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
|
||||
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
|
||||
struct ei_assignment_impl;
|
||||
|
||||
@ -156,39 +178,21 @@ inline Derived& MatrixBase<Derived>
|
||||
|
||||
//----
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling
|
||||
// no vectorization
|
||||
template <typename Derived, typename OtherDerived, bool Unroll>
|
||||
struct ei_assignment_impl<Derived, OtherDerived, false, Unroll>
|
||||
{
|
||||
static void run(Derived & dst, const OtherDerived & src)
|
||||
{
|
||||
ei_matrix_assignment_unroller
|
||||
<Derived, OtherDerived, int(Derived::SizeAtCompileTime)
|
||||
<Derived, OtherDerived,
|
||||
Unroll ? int(Derived::SizeAtCompileTime)
|
||||
: Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic ? -1 // col-major
|
||||
: -2 // row-major
|
||||
>::run(dst.derived(), src.derived());
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order
|
||||
{
|
||||
static void run(Derived & dst, const OtherDerived & src)
|
||||
{
|
||||
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
else
|
||||
{
|
||||
// traverse in row-major order
|
||||
// in order to allow the compiler to unroll the inner loop
|
||||
for(int i = 0; i < dst.rows(); i++)
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
@ -224,7 +228,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unroll
|
||||
};
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array
|
||||
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array like
|
||||
{
|
||||
static void run(Derived & dst, const OtherDerived & src)
|
||||
{
|
||||
|
@ -26,17 +26,17 @@
|
||||
#define EIGEN_DOT_H
|
||||
|
||||
template<int Index, int Size, typename Derived1, typename Derived2>
|
||||
struct ei_dot_unroller
|
||||
struct ei_dot_impl
|
||||
{
|
||||
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
|
||||
{
|
||||
ei_dot_unroller<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
|
||||
ei_dot_impl<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
|
||||
dot += v1.coeff(Index) * ei_conj(v2.coeff(Index));
|
||||
}
|
||||
};
|
||||
|
||||
template<int Size, typename Derived1, typename Derived2>
|
||||
struct ei_dot_unroller<0, Size, Derived1, Derived2>
|
||||
struct ei_dot_impl<0, Size, Derived1, Derived2>
|
||||
{
|
||||
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
|
||||
{
|
||||
@ -44,15 +44,20 @@ struct ei_dot_unroller<0, Size, Derived1, Derived2>
|
||||
}
|
||||
};
|
||||
|
||||
template<int Index, typename Derived1, typename Derived2>
|
||||
struct ei_dot_unroller<Index, Dynamic, Derived1, Derived2>
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_dot_impl<Dynamic, Dynamic, Derived1, Derived2>
|
||||
{
|
||||
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
|
||||
inline static void run(const Derived1& v1, const Derived2& v2, typename Derived1::Scalar& dot)
|
||||
{
|
||||
dot = v1.coeff(0) * ei_conj(v2.coeff(0));
|
||||
for(int i = 1; i < v1.size(); i++)
|
||||
dot += v1.coeff(i)* ei_conj(v2.coeff(i));
|
||||
}
|
||||
};
|
||||
|
||||
// prevent buggy user code from causing an infinite recursion
|
||||
template<int Index, typename Derived1, typename Derived2>
|
||||
struct ei_dot_unroller<Index, 0, Derived1, Derived2>
|
||||
struct ei_dot_impl<Index, 0, Derived1, Derived2>
|
||||
{
|
||||
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
|
||||
};
|
||||
@ -83,22 +88,16 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested);
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested);
|
||||
ei_assert(nested.size() == otherNested.size());
|
||||
Scalar res;
|
||||
const bool unroll = SizeAtCompileTime
|
||||
* (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost)
|
||||
+ (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost
|
||||
<= EIGEN_UNROLLING_LIMIT;
|
||||
if(unroll)
|
||||
ei_dot_unroller<int(SizeAtCompileTime)-1,
|
||||
|
||||
Scalar res;
|
||||
ei_dot_impl<unroll ? int(SizeAtCompileTime)-1 : Dynamic,
|
||||
unroll ? int(SizeAtCompileTime) : Dynamic,
|
||||
_Nested, _OtherNested>
|
||||
::run(nested, otherNested, res);
|
||||
else
|
||||
{
|
||||
res = nested.coeff(0) * ei_conj(otherNested.coeff(0));
|
||||
for(int i = 1; i < size(); i++)
|
||||
res += nested.coeff(i)* ei_conj(otherNested.coeff(i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -92,7 +92,7 @@ inline void Part<MatrixType, Mode>::operator=(const Other& other)
|
||||
}
|
||||
|
||||
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount>
|
||||
struct ei_part_assignment_unroller
|
||||
struct ei_part_assignment_impl
|
||||
{
|
||||
enum {
|
||||
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
|
||||
@ -101,7 +101,7 @@ struct ei_part_assignment_unroller
|
||||
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
ei_part_assignment_unroller<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
|
||||
ei_part_assignment_impl<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
|
||||
|
||||
if(Mode == SelfAdjoint)
|
||||
{
|
||||
@ -122,7 +122,7 @@ struct ei_part_assignment_unroller
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, unsigned int Mode>
|
||||
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
|
||||
struct ei_part_assignment_impl<Derived1, Derived2, Mode, 1>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
@ -133,17 +133,66 @@ struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
|
||||
|
||||
// prevent buggy user code from causing an infinite recursion
|
||||
template<typename Derived1, typename Derived2, unsigned int Mode>
|
||||
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 0>
|
||||
struct ei_part_assignment_impl<Derived1, Derived2, Mode, 0>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) {}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, unsigned int Mode>
|
||||
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, Dynamic>
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_part_assignment_impl<Derived1, Derived2, Upper, Dynamic>
|
||||
{
|
||||
inline static void run(Derived1 &, const Derived2 &) {}
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i <= j; i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_part_assignment_impl<Derived1, Derived2, Lower, Dynamic>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = j; i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpper, Dynamic>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = 0; i < j; i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
};
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLower, Dynamic>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
for(int i = j+1; i < dst.rows(); i++)
|
||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||
}
|
||||
};
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct ei_part_assignment_impl<Derived1, Derived2, SelfAdjoint, Dynamic>
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
for(int j = 0; j < dst.cols(); j++)
|
||||
{
|
||||
for(int i = 0; i < j; i++)
|
||||
dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j));
|
||||
dst.coeffRef(j, j) = ei_real(src.coeff(j, j));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename Other>
|
||||
@ -151,48 +200,12 @@ void Part<MatrixType, Mode>::lazyAssign(const Other& other)
|
||||
{
|
||||
const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT;
|
||||
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
|
||||
if(unroll)
|
||||
{
|
||||
ei_part_assignment_unroller
|
||||
|
||||
ei_part_assignment_impl
|
||||
<MatrixType, Other, Mode,
|
||||
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic
|
||||
>::run(m_matrix, other.derived());
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(Mode)
|
||||
{
|
||||
case Upper:
|
||||
for(int j = 0; j < m_matrix.cols(); j++)
|
||||
for(int i = 0; i <= j; i++)
|
||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
||||
break;
|
||||
case Lower:
|
||||
for(int j = 0; j < m_matrix.cols(); j++)
|
||||
for(int i = j; i < m_matrix.rows(); i++)
|
||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
||||
break;
|
||||
case StrictlyUpper:
|
||||
for(int j = 0; j < m_matrix.cols(); j++)
|
||||
for(int i = 0; i < j; i++)
|
||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
||||
break;
|
||||
case StrictlyLower:
|
||||
for(int j = 0; j < m_matrix.cols(); j++)
|
||||
for(int i = j+1; i < m_matrix.rows(); i++)
|
||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
||||
break;
|
||||
case SelfAdjoint:
|
||||
for(int j = 0; j < m_matrix.cols(); j++)
|
||||
{
|
||||
for(int i = 0; i < j; i++)
|
||||
m_matrix.coeffRef(j, i) = ei_conj(m_matrix.coeffRef(i, j) = other.coeff(i, j));
|
||||
m_matrix.coeffRef(j, j) = ei_real(other.coeff(j, j));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename Other> inline void Part<MatrixType, Mode>::operator+=(const Other& other)
|
||||
|
@ -47,8 +47,8 @@ struct ei_product_impl<0, Size, Lhs, Rhs>
|
||||
}
|
||||
};
|
||||
|
||||
template<int Index, typename Lhs, typename Rhs>
|
||||
struct ei_product_impl<Index, Dynamic, Lhs, Rhs>
|
||||
template<typename Lhs, typename Rhs>
|
||||
struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs>
|
||||
{
|
||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
|
||||
{
|
||||
@ -268,7 +268,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
{
|
||||
Scalar res;
|
||||
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||
ei_product_impl<Lhs::ColsAtCompileTime-1,
|
||||
ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic,
|
||||
unroll ? Lhs::ColsAtCompileTime : Dynamic,
|
||||
_LhsNested, _RhsNested>
|
||||
::run(row, col, m_lhs, m_rhs, res);
|
||||
|
@ -63,7 +63,17 @@ template<typename BinaryOp, typename Derived, int Start>
|
||||
struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic>
|
||||
{
|
||||
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
|
||||
static Scalar run(const Derived&, const BinaryOp&) { return Scalar(); }
|
||||
static Scalar run(const Derived& mat, const BinaryOp& func)
|
||||
{
|
||||
Scalar res;
|
||||
res = mat.coeff(0,0);
|
||||
for(int i = 1; i < mat.rows(); i++)
|
||||
res = func(res, mat.coeff(i, 0));
|
||||
for(int j = 1; j < mat.cols(); j++)
|
||||
for(int i = 0; i < mat.rows(); i++)
|
||||
res = func(res, mat.coeff(i, j));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
|
||||
@ -81,21 +91,9 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
|
||||
const bool unroll = SizeAtCompileTime * CoeffReadCost
|
||||
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
|
||||
<= EIGEN_UNROLLING_LIMIT;
|
||||
if(unroll)
|
||||
return ei_redux_unroller<BinaryOp, Derived, 0,
|
||||
unroll ? int(SizeAtCompileTime) : Dynamic>
|
||||
::run(derived(), func);
|
||||
else
|
||||
{
|
||||
Scalar res;
|
||||
res = coeff(0,0);
|
||||
for(int i = 1; i < rows(); i++)
|
||||
res = func(res, coeff(i, 0));
|
||||
for(int j = 1; j < cols(); j++)
|
||||
for(int i = 0; i < rows(); i++)
|
||||
res = func(res, coeff(i, j));
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/** \returns the sum of all coefficients of *this
|
||||
|
@ -52,7 +52,15 @@ struct ei_visitor_unroller<Visitor, Derived, 1>
|
||||
template<typename Visitor, typename Derived>
|
||||
struct ei_visitor_unroller<Visitor, Derived, Dynamic>
|
||||
{
|
||||
inline static void run(const Derived &, Visitor&) {}
|
||||
inline static void run(const Derived& mat, Visitor& visitor)
|
||||
{
|
||||
visitor.init(mat.coeff(0,0), 0, 0);
|
||||
for(int i = 1; i < mat.rows(); i++)
|
||||
visitor(mat.coeff(i, 0), i, 0);
|
||||
for(int j = 1; j < mat.cols(); j++)
|
||||
for(int i = 0; i < mat.rows(); i++)
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -77,19 +85,9 @@ void MatrixBase<Derived>::visit(Visitor& visitor) const
|
||||
const bool unroll = SizeAtCompileTime * CoeffReadCost
|
||||
+ (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost
|
||||
<= EIGEN_UNROLLING_LIMIT;
|
||||
if(unroll)
|
||||
return ei_visitor_unroller<Visitor, Derived,
|
||||
unroll ? int(SizeAtCompileTime) : Dynamic
|
||||
>::run(derived(), visitor);
|
||||
else
|
||||
{
|
||||
visitor.init(coeff(0,0), 0, 0);
|
||||
for(int i = 1; i < rows(); i++)
|
||||
visitor(coeff(i, 0), i, 0);
|
||||
for(int j = 1; j < cols(); j++)
|
||||
for(int i = 0; i < rows(); i++)
|
||||
visitor(coeff(i, j), i, j);
|
||||
}
|
||||
}
|
||||
|
||||
/** \internal
|
||||
|
@ -34,7 +34,7 @@
|
||||
|
||||
/** Defines the maximal loop size to enable meta unrolling of loops */
|
||||
#ifndef EIGEN_UNROLLING_LIMIT
|
||||
#define EIGEN_UNROLLING_LIMIT 400
|
||||
#define EIGEN_UNROLLING_LIMIT 100
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
|
Loading…
x
Reference in New Issue
Block a user