mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-18 12:23:13 +08:00
* move some compile time "if" to their respective unroller (assign and dot)
* fix a couple of compilation issues when unrolling is disabled * reduce default unrolling limit to a more reasonable value
This commit is contained in:
parent
a172385720
commit
6998037930
@ -58,10 +58,30 @@ struct ei_matrix_assignment_unroller<Derived1, Derived2, 0>
|
|||||||
inline static void run(Derived1 &, const Derived2 &) {}
|
inline static void run(Derived1 &, const Derived2 &) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Dynamic col-major
|
||||||
template<typename Derived1, typename Derived2>
|
template<typename Derived1, typename Derived2>
|
||||||
struct ei_matrix_assignment_unroller<Derived1, Derived2, Dynamic>
|
struct ei_matrix_assignment_unroller<Derived1, Derived2, -1>
|
||||||
{
|
{
|
||||||
inline static void run(Derived1 &, const Derived2 &) {}
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
for(int i = 0; i < dst.rows(); i++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Dynamic row-major
|
||||||
|
template<typename Derived1, typename Derived2>
|
||||||
|
struct ei_matrix_assignment_unroller<Derived1, Derived2, -2>
|
||||||
|
{
|
||||||
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
|
{
|
||||||
|
// traverse in row-major order
|
||||||
|
// in order to allow the compiler to unroll the inner loop
|
||||||
|
for(int i = 0; i < dst.rows(); i++)
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//----
|
//----
|
||||||
@ -104,9 +124,11 @@ template <typename Derived, typename OtherDerived,
|
|||||||
bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
|
bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
|
||||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
|
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
|
||||||
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
|
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
|
||||||
||((int(Derived::Flags)&RowMajorBit)
|
|| ((int(Derived::Flags) & RowMajorBit)
|
||||||
? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
|
? int(Derived::ColsAtCompileTime)!=Dynamic
|
||||||
: int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
|
&& (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
|
||||||
|
: int(Derived::RowsAtCompileTime)!=Dynamic
|
||||||
|
&& (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
|
||||||
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
|
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
|
||||||
struct ei_assignment_impl;
|
struct ei_assignment_impl;
|
||||||
|
|
||||||
@ -156,39 +178,21 @@ inline Derived& MatrixBase<Derived>
|
|||||||
|
|
||||||
//----
|
//----
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
// no vectorization
|
||||||
struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling
|
template <typename Derived, typename OtherDerived, bool Unroll>
|
||||||
|
struct ei_assignment_impl<Derived, OtherDerived, false, Unroll>
|
||||||
{
|
{
|
||||||
static void run(Derived & dst, const OtherDerived & src)
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
{
|
{
|
||||||
ei_matrix_assignment_unroller
|
ei_matrix_assignment_unroller
|
||||||
<Derived, OtherDerived, int(Derived::SizeAtCompileTime)
|
<Derived, OtherDerived,
|
||||||
|
Unroll ? int(Derived::SizeAtCompileTime)
|
||||||
|
: Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic ? -1 // col-major
|
||||||
|
: -2 // row-major
|
||||||
>::run(dst.derived(), src.derived());
|
>::run(dst.derived(), src.derived());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
|
||||||
struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order
|
|
||||||
{
|
|
||||||
static void run(Derived & dst, const OtherDerived & src)
|
|
||||||
{
|
|
||||||
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
|
|
||||||
{
|
|
||||||
for(int j = 0; j < dst.cols(); j++)
|
|
||||||
for(int i = 0; i < dst.rows(); i++)
|
|
||||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// traverse in row-major order
|
|
||||||
// in order to allow the compiler to unroll the inner loop
|
|
||||||
for(int i = 0; i < dst.rows(); i++)
|
|
||||||
for(int j = 0; j < dst.cols(); j++)
|
|
||||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//----
|
//----
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
template <typename Derived, typename OtherDerived>
|
||||||
@ -224,7 +228,7 @@ struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unroll
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
template <typename Derived, typename OtherDerived>
|
||||||
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array
|
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array like
|
||||||
{
|
{
|
||||||
static void run(Derived & dst, const OtherDerived & src)
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
{
|
{
|
||||||
|
@ -26,17 +26,17 @@
|
|||||||
#define EIGEN_DOT_H
|
#define EIGEN_DOT_H
|
||||||
|
|
||||||
template<int Index, int Size, typename Derived1, typename Derived2>
|
template<int Index, int Size, typename Derived1, typename Derived2>
|
||||||
struct ei_dot_unroller
|
struct ei_dot_impl
|
||||||
{
|
{
|
||||||
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
|
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
|
||||||
{
|
{
|
||||||
ei_dot_unroller<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
|
ei_dot_impl<Index-1, Size, Derived1, Derived2>::run(v1, v2, dot);
|
||||||
dot += v1.coeff(Index) * ei_conj(v2.coeff(Index));
|
dot += v1.coeff(Index) * ei_conj(v2.coeff(Index));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Size, typename Derived1, typename Derived2>
|
template<int Size, typename Derived1, typename Derived2>
|
||||||
struct ei_dot_unroller<0, Size, Derived1, Derived2>
|
struct ei_dot_impl<0, Size, Derived1, Derived2>
|
||||||
{
|
{
|
||||||
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
|
inline static void run(const Derived1 &v1, const Derived2& v2, typename Derived1::Scalar &dot)
|
||||||
{
|
{
|
||||||
@ -44,15 +44,20 @@ struct ei_dot_unroller<0, Size, Derived1, Derived2>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Index, typename Derived1, typename Derived2>
|
template<typename Derived1, typename Derived2>
|
||||||
struct ei_dot_unroller<Index, Dynamic, Derived1, Derived2>
|
struct ei_dot_impl<Dynamic, Dynamic, Derived1, Derived2>
|
||||||
{
|
{
|
||||||
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
|
inline static void run(const Derived1& v1, const Derived2& v2, typename Derived1::Scalar& dot)
|
||||||
|
{
|
||||||
|
dot = v1.coeff(0) * ei_conj(v2.coeff(0));
|
||||||
|
for(int i = 1; i < v1.size(); i++)
|
||||||
|
dot += v1.coeff(i)* ei_conj(v2.coeff(i));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// prevent buggy user code from causing an infinite recursion
|
// prevent buggy user code from causing an infinite recursion
|
||||||
template<int Index, typename Derived1, typename Derived2>
|
template<int Index, typename Derived1, typename Derived2>
|
||||||
struct ei_dot_unroller<Index, 0, Derived1, Derived2>
|
struct ei_dot_impl<Index, 0, Derived1, Derived2>
|
||||||
{
|
{
|
||||||
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
|
inline static void run(const Derived1&, const Derived2&, typename Derived1::Scalar&) {}
|
||||||
};
|
};
|
||||||
@ -83,22 +88,16 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
|||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested);
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested);
|
||||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested);
|
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested);
|
||||||
ei_assert(nested.size() == otherNested.size());
|
ei_assert(nested.size() == otherNested.size());
|
||||||
Scalar res;
|
|
||||||
const bool unroll = SizeAtCompileTime
|
const bool unroll = SizeAtCompileTime
|
||||||
* (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost)
|
* (_Nested::CoeffReadCost + _OtherNested::CoeffReadCost + NumTraits<Scalar>::MulCost)
|
||||||
+ (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost
|
+ (int(SizeAtCompileTime) - 1) * NumTraits<Scalar>::AddCost
|
||||||
<= EIGEN_UNROLLING_LIMIT;
|
<= EIGEN_UNROLLING_LIMIT;
|
||||||
if(unroll)
|
|
||||||
ei_dot_unroller<int(SizeAtCompileTime)-1,
|
Scalar res;
|
||||||
|
ei_dot_impl<unroll ? int(SizeAtCompileTime)-1 : Dynamic,
|
||||||
unroll ? int(SizeAtCompileTime) : Dynamic,
|
unroll ? int(SizeAtCompileTime) : Dynamic,
|
||||||
_Nested, _OtherNested>
|
_Nested, _OtherNested>
|
||||||
::run(nested, otherNested, res);
|
::run(nested, otherNested, res);
|
||||||
else
|
|
||||||
{
|
|
||||||
res = nested.coeff(0) * ei_conj(otherNested.coeff(0));
|
|
||||||
for(int i = 1; i < size(); i++)
|
|
||||||
res += nested.coeff(i)* ei_conj(otherNested.coeff(i));
|
|
||||||
}
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ inline void Part<MatrixType, Mode>::operator=(const Other& other)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount>
|
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount>
|
||||||
struct ei_part_assignment_unroller
|
struct ei_part_assignment_impl
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
|
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
|
||||||
@ -101,7 +101,7 @@ struct ei_part_assignment_unroller
|
|||||||
|
|
||||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
{
|
{
|
||||||
ei_part_assignment_unroller<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
|
ei_part_assignment_impl<Derived1, Derived2, Mode, UnrollCount-1>::run(dst, src);
|
||||||
|
|
||||||
if(Mode == SelfAdjoint)
|
if(Mode == SelfAdjoint)
|
||||||
{
|
{
|
||||||
@ -122,7 +122,7 @@ struct ei_part_assignment_unroller
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived1, typename Derived2, unsigned int Mode>
|
template<typename Derived1, typename Derived2, unsigned int Mode>
|
||||||
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
|
struct ei_part_assignment_impl<Derived1, Derived2, Mode, 1>
|
||||||
{
|
{
|
||||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
{
|
{
|
||||||
@ -133,17 +133,66 @@ struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 1>
|
|||||||
|
|
||||||
// prevent buggy user code from causing an infinite recursion
|
// prevent buggy user code from causing an infinite recursion
|
||||||
template<typename Derived1, typename Derived2, unsigned int Mode>
|
template<typename Derived1, typename Derived2, unsigned int Mode>
|
||||||
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, 0>
|
struct ei_part_assignment_impl<Derived1, Derived2, Mode, 0>
|
||||||
{
|
{
|
||||||
inline static void run(Derived1 &, const Derived2 &) {}
|
inline static void run(Derived1 &, const Derived2 &) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived1, typename Derived2, unsigned int Mode>
|
template<typename Derived1, typename Derived2>
|
||||||
struct ei_part_assignment_unroller<Derived1, Derived2, Mode, Dynamic>
|
struct ei_part_assignment_impl<Derived1, Derived2, Upper, Dynamic>
|
||||||
{
|
{
|
||||||
inline static void run(Derived1 &, const Derived2 &) {}
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
for(int i = 0; i <= j; i++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Derived1, typename Derived2>
|
||||||
|
struct ei_part_assignment_impl<Derived1, Derived2, Lower, Dynamic>
|
||||||
|
{
|
||||||
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
for(int i = j; i < dst.rows(); i++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Derived1, typename Derived2>
|
||||||
|
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyUpper, Dynamic>
|
||||||
|
{
|
||||||
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
for(int i = 0; i < j; i++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<typename Derived1, typename Derived2>
|
||||||
|
struct ei_part_assignment_impl<Derived1, Derived2, StrictlyLower, Dynamic>
|
||||||
|
{
|
||||||
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
for(int i = j+1; i < dst.rows(); i++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<typename Derived1, typename Derived2>
|
||||||
|
struct ei_part_assignment_impl<Derived1, Derived2, SelfAdjoint, Dynamic>
|
||||||
|
{
|
||||||
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < j; i++)
|
||||||
|
dst.coeffRef(j, i) = ei_conj(dst.coeffRef(i, j) = src.coeff(i, j));
|
||||||
|
dst.coeffRef(j, j) = ei_real(src.coeff(j, j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<typename MatrixType, unsigned int Mode>
|
template<typename MatrixType, unsigned int Mode>
|
||||||
template<typename Other>
|
template<typename Other>
|
||||||
@ -151,47 +200,11 @@ void Part<MatrixType, Mode>::lazyAssign(const Other& other)
|
|||||||
{
|
{
|
||||||
const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT;
|
const bool unroll = MatrixType::SizeAtCompileTime * Other::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT;
|
||||||
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
|
ei_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
|
||||||
if(unroll)
|
|
||||||
{
|
ei_part_assignment_impl
|
||||||
ei_part_assignment_unroller
|
|
||||||
<MatrixType, Other, Mode,
|
<MatrixType, Other, Mode,
|
||||||
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic
|
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic
|
||||||
>::run(m_matrix, other.derived());
|
>::run(m_matrix, other.derived());
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
switch(Mode)
|
|
||||||
{
|
|
||||||
case Upper:
|
|
||||||
for(int j = 0; j < m_matrix.cols(); j++)
|
|
||||||
for(int i = 0; i <= j; i++)
|
|
||||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
|
||||||
break;
|
|
||||||
case Lower:
|
|
||||||
for(int j = 0; j < m_matrix.cols(); j++)
|
|
||||||
for(int i = j; i < m_matrix.rows(); i++)
|
|
||||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
|
||||||
break;
|
|
||||||
case StrictlyUpper:
|
|
||||||
for(int j = 0; j < m_matrix.cols(); j++)
|
|
||||||
for(int i = 0; i < j; i++)
|
|
||||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
|
||||||
break;
|
|
||||||
case StrictlyLower:
|
|
||||||
for(int j = 0; j < m_matrix.cols(); j++)
|
|
||||||
for(int i = j+1; i < m_matrix.rows(); i++)
|
|
||||||
m_matrix.coeffRef(i, j) = other.coeff(i, j);
|
|
||||||
break;
|
|
||||||
case SelfAdjoint:
|
|
||||||
for(int j = 0; j < m_matrix.cols(); j++)
|
|
||||||
{
|
|
||||||
for(int i = 0; i < j; i++)
|
|
||||||
m_matrix.coeffRef(j, i) = ei_conj(m_matrix.coeffRef(i, j) = other.coeff(i, j));
|
|
||||||
m_matrix.coeffRef(j, j) = ei_real(other.coeff(j, j));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename MatrixType, unsigned int Mode>
|
template<typename MatrixType, unsigned int Mode>
|
||||||
|
@ -47,8 +47,8 @@ struct ei_product_impl<0, Size, Lhs, Rhs>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Index, typename Lhs, typename Rhs>
|
template<typename Lhs, typename Rhs>
|
||||||
struct ei_product_impl<Index, Dynamic, Lhs, Rhs>
|
struct ei_product_impl<Dynamic, Dynamic, Lhs, Rhs>
|
||||||
{
|
{
|
||||||
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
|
inline static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar& res)
|
||||||
{
|
{
|
||||||
@ -268,7 +268,7 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
|||||||
{
|
{
|
||||||
Scalar res;
|
Scalar res;
|
||||||
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||||
ei_product_impl<Lhs::ColsAtCompileTime-1,
|
ei_product_impl<unroll ? Lhs::ColsAtCompileTime-1 : Dynamic,
|
||||||
unroll ? Lhs::ColsAtCompileTime : Dynamic,
|
unroll ? Lhs::ColsAtCompileTime : Dynamic,
|
||||||
_LhsNested, _RhsNested>
|
_LhsNested, _RhsNested>
|
||||||
::run(row, col, m_lhs, m_rhs, res);
|
::run(row, col, m_lhs, m_rhs, res);
|
||||||
|
@ -63,7 +63,17 @@ template<typename BinaryOp, typename Derived, int Start>
|
|||||||
struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic>
|
struct ei_redux_unroller<BinaryOp, Derived, Start, Dynamic>
|
||||||
{
|
{
|
||||||
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
|
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
|
||||||
static Scalar run(const Derived&, const BinaryOp&) { return Scalar(); }
|
static Scalar run(const Derived& mat, const BinaryOp& func)
|
||||||
|
{
|
||||||
|
Scalar res;
|
||||||
|
res = mat.coeff(0,0);
|
||||||
|
for(int i = 1; i < mat.rows(); i++)
|
||||||
|
res = func(res, mat.coeff(i, 0));
|
||||||
|
for(int j = 1; j < mat.cols(); j++)
|
||||||
|
for(int i = 0; i < mat.rows(); i++)
|
||||||
|
res = func(res, mat.coeff(i, j));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
|
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
|
||||||
@ -81,21 +91,9 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
|
|||||||
const bool unroll = SizeAtCompileTime * CoeffReadCost
|
const bool unroll = SizeAtCompileTime * CoeffReadCost
|
||||||
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
|
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
|
||||||
<= EIGEN_UNROLLING_LIMIT;
|
<= EIGEN_UNROLLING_LIMIT;
|
||||||
if(unroll)
|
|
||||||
return ei_redux_unroller<BinaryOp, Derived, 0,
|
return ei_redux_unroller<BinaryOp, Derived, 0,
|
||||||
unroll ? int(SizeAtCompileTime) : Dynamic>
|
unroll ? int(SizeAtCompileTime) : Dynamic>
|
||||||
::run(derived(), func);
|
::run(derived(), func);
|
||||||
else
|
|
||||||
{
|
|
||||||
Scalar res;
|
|
||||||
res = coeff(0,0);
|
|
||||||
for(int i = 1; i < rows(); i++)
|
|
||||||
res = func(res, coeff(i, 0));
|
|
||||||
for(int j = 1; j < cols(); j++)
|
|
||||||
for(int i = 0; i < rows(); i++)
|
|
||||||
res = func(res, coeff(i, j));
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the sum of all coefficients of *this
|
/** \returns the sum of all coefficients of *this
|
||||||
|
@ -52,7 +52,15 @@ struct ei_visitor_unroller<Visitor, Derived, 1>
|
|||||||
template<typename Visitor, typename Derived>
|
template<typename Visitor, typename Derived>
|
||||||
struct ei_visitor_unroller<Visitor, Derived, Dynamic>
|
struct ei_visitor_unroller<Visitor, Derived, Dynamic>
|
||||||
{
|
{
|
||||||
inline static void run(const Derived &, Visitor&) {}
|
inline static void run(const Derived& mat, Visitor& visitor)
|
||||||
|
{
|
||||||
|
visitor.init(mat.coeff(0,0), 0, 0);
|
||||||
|
for(int i = 1; i < mat.rows(); i++)
|
||||||
|
visitor(mat.coeff(i, 0), i, 0);
|
||||||
|
for(int j = 1; j < mat.cols(); j++)
|
||||||
|
for(int i = 0; i < mat.rows(); i++)
|
||||||
|
visitor(mat.coeff(i, j), i, j);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -77,19 +85,9 @@ void MatrixBase<Derived>::visit(Visitor& visitor) const
|
|||||||
const bool unroll = SizeAtCompileTime * CoeffReadCost
|
const bool unroll = SizeAtCompileTime * CoeffReadCost
|
||||||
+ (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost
|
+ (SizeAtCompileTime-1) * ei_functor_traits<Visitor>::Cost
|
||||||
<= EIGEN_UNROLLING_LIMIT;
|
<= EIGEN_UNROLLING_LIMIT;
|
||||||
if(unroll)
|
|
||||||
return ei_visitor_unroller<Visitor, Derived,
|
return ei_visitor_unroller<Visitor, Derived,
|
||||||
unroll ? int(SizeAtCompileTime) : Dynamic
|
unroll ? int(SizeAtCompileTime) : Dynamic
|
||||||
>::run(derived(), visitor);
|
>::run(derived(), visitor);
|
||||||
else
|
|
||||||
{
|
|
||||||
visitor.init(coeff(0,0), 0, 0);
|
|
||||||
for(int i = 1; i < rows(); i++)
|
|
||||||
visitor(coeff(i, 0), i, 0);
|
|
||||||
for(int j = 1; j < cols(); j++)
|
|
||||||
for(int i = 0; i < rows(); i++)
|
|
||||||
visitor(coeff(i, j), i, j);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
|
|
||||||
/** Defines the maximal loop size to enable meta unrolling of loops */
|
/** Defines the maximal loop size to enable meta unrolling of loops */
|
||||||
#ifndef EIGEN_UNROLLING_LIMIT
|
#ifndef EIGEN_UNROLLING_LIMIT
|
||||||
#define EIGEN_UNROLLING_LIMIT 400
|
#define EIGEN_UNROLLING_LIMIT 100
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||||
|
Loading…
x
Reference in New Issue
Block a user