mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-12 17:33:15 +08:00
* replace compile-time-if by meta-selector in Assign.h
as it speed up compilation. * fix minor typo introduced in the previous commit
This commit is contained in:
parent
e2ac5d244e
commit
f5e599e489
@ -42,4 +42,4 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // EIGEN_ARRAYBASE_H
|
#endif // EIGEN_ARRAYBASE_H
|
||||||
|
@ -98,13 +98,16 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
|
|||||||
{ ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); }
|
{ ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//----
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived,
|
template <typename Derived, typename OtherDerived,
|
||||||
bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
|
bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
|
||||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
|
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
|
||||||
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
|
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
|
||||||
||((int(Derived::Flags)&RowMajorBit)
|
||((int(Derived::Flags)&RowMajorBit)
|
||||||
? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
|
? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
|
||||||
: int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) )>
|
: int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
|
||||||
|
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
|
||||||
struct ei_assignment_impl;
|
struct ei_assignment_impl;
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
@ -112,141 +115,176 @@ template<typename OtherDerived>
|
|||||||
inline Derived& MatrixBase<Derived>
|
inline Derived& MatrixBase<Derived>
|
||||||
::lazyAssign(const MatrixBase<OtherDerived>& other)
|
::lazyAssign(const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
ei_assignment_impl<Derived, OtherDerived>::execute(derived(),other.derived());
|
ei_assert(rows() == other.rows() && cols() == other.cols());
|
||||||
|
ei_assignment_impl<Derived, OtherDerived>::run(derived(),other.derived());
|
||||||
return derived();
|
return derived();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Derived, typename OtherDerived,
|
||||||
|
bool EvalBeforeAssigning = (OtherDerived::Flags & EvalBeforeAssigningBit),
|
||||||
|
bool NeedToTranspose = Derived::IsVectorAtCompileTime
|
||||||
|
&& OtherDerived::IsVectorAtCompileTime
|
||||||
|
&& (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime
|
||||||
|
&& (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime>
|
||||||
|
struct ei_assign_selector;
|
||||||
|
|
||||||
|
template<typename Derived, typename OtherDerived>
|
||||||
|
struct ei_assign_selector<Derived,OtherDerived,true,true> {
|
||||||
|
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
|
||||||
|
};
|
||||||
|
template<typename Derived, typename OtherDerived>
|
||||||
|
struct ei_assign_selector<Derived,OtherDerived,true,false> {
|
||||||
|
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
|
||||||
|
};
|
||||||
|
template<typename Derived, typename OtherDerived>
|
||||||
|
struct ei_assign_selector<Derived,OtherDerived,false,true> {
|
||||||
|
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
|
||||||
|
};
|
||||||
|
template<typename Derived, typename OtherDerived>
|
||||||
|
struct ei_assign_selector<Derived,OtherDerived,false,false> {
|
||||||
|
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
|
||||||
|
};
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
inline Derived& MatrixBase<Derived>
|
inline Derived& MatrixBase<Derived>
|
||||||
::operator=(const MatrixBase<OtherDerived>& other)
|
::operator=(const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
const bool need_to_transpose = Derived::IsVectorAtCompileTime
|
return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
|
||||||
&& OtherDerived::IsVectorAtCompileTime
|
|
||||||
&& (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime
|
|
||||||
&& (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime;
|
|
||||||
if(OtherDerived::Flags & EvalBeforeAssigningBit)
|
|
||||||
{
|
|
||||||
if(need_to_transpose)
|
|
||||||
return lazyAssign(other.transpose().eval());
|
|
||||||
else
|
|
||||||
return lazyAssign(other.eval());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(need_to_transpose)
|
|
||||||
return lazyAssign(other.transpose());
|
|
||||||
else
|
|
||||||
return lazyAssign(other.derived());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//----
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
template <typename Derived, typename OtherDerived>
|
||||||
struct ei_assignment_impl<Derived, OtherDerived, false>
|
struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling
|
||||||
{
|
{
|
||||||
static void execute(Derived & dst, const OtherDerived & src)
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
{
|
{
|
||||||
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
ei_matrix_assignment_unroller
|
||||||
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
<Derived, OtherDerived, int(Derived::SizeAtCompileTime)
|
||||||
if(unroll)
|
>::run(dst.derived(), src.derived());
|
||||||
{
|
|
||||||
ei_matrix_assignment_unroller
|
|
||||||
<Derived, OtherDerived,
|
|
||||||
unroll ? int(Derived::SizeAtCompileTime) : Dynamic
|
|
||||||
>::run(dst.derived(), src.derived());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
|
|
||||||
{
|
|
||||||
for(int j = 0; j < dst.cols(); j++)
|
|
||||||
for(int i = 0; i < dst.rows(); i++)
|
|
||||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// traverse in row-major order
|
|
||||||
// in order to allow the compiler to unroll the inner loop
|
|
||||||
for(int i = 0; i < dst.rows(); i++)
|
|
||||||
for(int j = 0; j < dst.cols(); j++)
|
|
||||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
template <typename Derived, typename OtherDerived>
|
||||||
struct ei_assignment_impl<Derived, OtherDerived, true>
|
struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order
|
||||||
{
|
{
|
||||||
static void execute(Derived & dst, const OtherDerived & src)
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
{
|
{
|
||||||
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
|
||||||
if(unroll)
|
|
||||||
{
|
{
|
||||||
ei_matrix_assignment_packet_unroller
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
<Derived, OtherDerived,
|
for(int i = 0; i < dst.rows(); i++)
|
||||||
unroll && int(Derived::SizeAtCompileTime)>=int(ei_packet_traits<typename Derived::Scalar>::size)
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
? int(Derived::SizeAtCompileTime)-int(ei_packet_traits<typename Derived::Scalar>::size)
|
|
||||||
: Dynamic>::run(dst.const_cast_derived(), src.derived());
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(OtherDerived::Flags&RowMajorBit)
|
// traverse in row-major order
|
||||||
{
|
// in order to allow the compiler to unroll the inner loop
|
||||||
if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
|
for(int i = 0; i < dst.rows(); i++)
|
||||||
&& (Derived::ColsAtCompileTime==Dynamic
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|| Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
{
|
|
||||||
const int size = dst.rows() * dst.cols();
|
|
||||||
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
|
|
||||||
int index = 0;
|
|
||||||
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
|
|
||||||
{
|
|
||||||
// FIXME the following is not really efficient
|
|
||||||
int i = index/dst.cols();
|
|
||||||
int j = index%dst.cols();
|
|
||||||
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
|
||||||
}
|
|
||||||
for(int i = alignedSize/dst.cols(); i < dst.rows(); i++)
|
|
||||||
for(int j = alignedSize%dst.cols(); j < dst.cols(); j++)
|
|
||||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(int i = 0; i < dst.rows(); i++)
|
|
||||||
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
|
|
||||||
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
|
|
||||||
&& ( Derived::RowsAtCompileTime==Dynamic
|
|
||||||
|| Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
|
|
||||||
{
|
|
||||||
const int size = dst.rows() * dst.cols();
|
|
||||||
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
|
|
||||||
int index = 0;
|
|
||||||
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
|
|
||||||
{
|
|
||||||
// FIXME the following is not really efficient
|
|
||||||
int i = index%dst.rows();
|
|
||||||
int j = index/dst.rows();
|
|
||||||
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
|
||||||
}
|
|
||||||
for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
|
|
||||||
for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
|
|
||||||
dst.coeffRef(i, j) = src.coeff(i, j);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(int j = 0; j < dst.cols(); j++)
|
|
||||||
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
|
|
||||||
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//----
|
||||||
|
|
||||||
|
template <typename Derived, typename OtherDerived>
|
||||||
|
struct ei_assignment_impl<Derived, OtherDerived, true, true> // vec + unrolling
|
||||||
|
{
|
||||||
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
|
{
|
||||||
|
ei_matrix_assignment_packet_unroller
|
||||||
|
<Derived, OtherDerived,
|
||||||
|
int(Derived::SizeAtCompileTime)-int(ei_packet_traits<typename Derived::Scalar>::size)
|
||||||
|
>::run(dst.const_cast_derived(), src.derived());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived, typename OtherDerived,
|
||||||
|
bool RowMajor = OtherDerived::Flags&RowMajorBit,
|
||||||
|
bool Complex1DArray = RowMajor
|
||||||
|
? ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
|
||||||
|
&& ( Derived::ColsAtCompileTime==Dynamic
|
||||||
|
|| Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0) )
|
||||||
|
: ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
|
||||||
|
&& ( Derived::RowsAtCompileTime==Dynamic
|
||||||
|
|| Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))>
|
||||||
|
struct ei_packet_assignment_seclector;
|
||||||
|
|
||||||
|
template <typename Derived, typename OtherDerived>
|
||||||
|
struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unrolling
|
||||||
|
{
|
||||||
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
|
{
|
||||||
|
ei_packet_assignment_seclector<Derived,OtherDerived>::run(dst,src);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived, typename OtherDerived>
|
||||||
|
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array
|
||||||
|
{
|
||||||
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
|
{
|
||||||
|
const int size = dst.rows() * dst.cols();
|
||||||
|
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)
|
||||||
|
* ei_packet_traits<typename Derived::Scalar>::size;
|
||||||
|
int index = 0;
|
||||||
|
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||||
|
{
|
||||||
|
// FIXME the following is not really efficient
|
||||||
|
int i = index/dst.cols();
|
||||||
|
int j = index%dst.cols();
|
||||||
|
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
||||||
|
}
|
||||||
|
for(int i = alignedSize/dst.cols(); i < dst.rows(); i++)
|
||||||
|
for(int j = alignedSize%dst.cols(); j < dst.cols(); j++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived, typename OtherDerived>
|
||||||
|
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, false> // row-major + normal
|
||||||
|
{
|
||||||
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < dst.rows(); i++)
|
||||||
|
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||||
|
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived, typename OtherDerived>
|
||||||
|
struct ei_packet_assignment_seclector<Derived, OtherDerived, false, true> // col-major + complex 1D array like
|
||||||
|
{
|
||||||
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
|
{
|
||||||
|
const int size = dst.rows() * dst.cols();
|
||||||
|
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
|
||||||
|
int index = 0;
|
||||||
|
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||||
|
{
|
||||||
|
// FIXME the following is not really efficient
|
||||||
|
int i = index%dst.rows();
|
||||||
|
int j = index/dst.rows();
|
||||||
|
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
||||||
|
}
|
||||||
|
for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
|
||||||
|
for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
|
||||||
|
dst.coeffRef(i, j) = src.coeff(i, j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived, typename OtherDerived>
|
||||||
|
struct ei_packet_assignment_seclector<Derived, OtherDerived, false, false> // col-major + normal
|
||||||
|
{
|
||||||
|
static void run(Derived & dst, const OtherDerived & src)
|
||||||
|
{
|
||||||
|
for(int j = 0; j < dst.cols(); j++)
|
||||||
|
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
|
||||||
|
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif // EIGEN_ASSIGN_H
|
#endif // EIGEN_ASSIGN_H
|
||||||
|
@ -595,11 +595,6 @@ template<typename Derived> class MatrixBase : public ArrayBase<Derived>
|
|||||||
const QR<typename ei_eval<Derived>::type> qr() const;
|
const QR<typename ei_eval<Derived>::type> qr() const;
|
||||||
//@}
|
//@}
|
||||||
|
|
||||||
|
|
||||||
#ifdef EIGEN_MATRIX_CUSTOM_ADDONS_FILE
|
|
||||||
#include EIGEN_MATRIX_CUSTOM_ADDONS_FILE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // EIGEN_MATRIXBASE_H
|
#endif // EIGEN_MATRIXBASE_H
|
||||||
|
@ -323,7 +323,7 @@ template<typename OtherDerived>
|
|||||||
inline const typename MatrixBase<Derived>::template ProductReturnType<OtherDerived>::Type
|
inline const typename MatrixBase<Derived>::template ProductReturnType<OtherDerived>::Type
|
||||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||||
{
|
{
|
||||||
assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags) );
|
assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags&ArrayBit) );
|
||||||
return typename ProductReturnType<OtherDerived>::Type(derived(), other.derived());
|
return typename ProductReturnType<OtherDerived>::Type(derived(), other.derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user