* replace compile-time-if by meta-selector in Assign.h

as it speed up compilation.
* fix minor typo introduced in the previous commit
This commit is contained in:
Gael Guennebaud 2008-05-31 14:42:07 +00:00
parent e2ac5d244e
commit f5e599e489
4 changed files with 151 additions and 118 deletions

View File

@ -98,13 +98,16 @@ struct ei_matrix_assignment_packet_unroller<Derived1, Derived2, Dynamic>
{ ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); } { ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); }
}; };
//----
template <typename Derived, typename OtherDerived, template <typename Derived, typename OtherDerived,
bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit) bool Vectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & VectorizableBit)
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)) && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit))
&& ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit) && ( (int(Derived::Flags) & int(OtherDerived::Flags) & Like1DArrayBit)
||((int(Derived::Flags)&RowMajorBit) ||((int(Derived::Flags)&RowMajorBit)
? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0) ? int(Derived::ColsAtCompileTime)!=Dynamic && (int(Derived::ColsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)
: int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) )> : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits<typename Derived::Scalar>::size==0)) ),
bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT>
struct ei_assignment_impl; struct ei_assignment_impl;
template<typename Derived> template<typename Derived>
@ -112,141 +115,176 @@ template<typename OtherDerived>
inline Derived& MatrixBase<Derived> inline Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other) ::lazyAssign(const MatrixBase<OtherDerived>& other)
{ {
ei_assignment_impl<Derived, OtherDerived>::execute(derived(),other.derived()); ei_assert(rows() == other.rows() && cols() == other.cols());
ei_assignment_impl<Derived, OtherDerived>::run(derived(),other.derived());
return derived(); return derived();
} }
template<typename Derived, typename OtherDerived,
bool EvalBeforeAssigning = (OtherDerived::Flags & EvalBeforeAssigningBit),
bool NeedToTranspose = Derived::IsVectorAtCompileTime
&& OtherDerived::IsVectorAtCompileTime
&& (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime
&& (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime>
struct ei_assign_selector;
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,true,true> {
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
};
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,true,false> {
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
};
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,false,true> {
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
};
template<typename Derived, typename OtherDerived>
struct ei_assign_selector<Derived,OtherDerived,false,false> {
static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
};
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
inline Derived& MatrixBase<Derived> inline Derived& MatrixBase<Derived>
::operator=(const MatrixBase<OtherDerived>& other) ::operator=(const MatrixBase<OtherDerived>& other)
{ {
const bool need_to_transpose = Derived::IsVectorAtCompileTime return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
&& OtherDerived::IsVectorAtCompileTime
&& (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime
&& (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime;
if(OtherDerived::Flags & EvalBeforeAssigningBit)
{
if(need_to_transpose)
return lazyAssign(other.transpose().eval());
else
return lazyAssign(other.eval());
}
else
{
if(need_to_transpose)
return lazyAssign(other.transpose());
else
return lazyAssign(other.derived());
}
} }
//----
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, false> struct ei_assignment_impl<Derived, OtherDerived, false, true> // no vec + unrolling
{ {
static void execute(Derived & dst, const OtherDerived & src) static void run(Derived & dst, const OtherDerived & src)
{ {
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; ei_matrix_assignment_unroller
ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); <Derived, OtherDerived, int(Derived::SizeAtCompileTime)
if(unroll) >::run(dst.derived(), src.derived());
{
ei_matrix_assignment_unroller
<Derived, OtherDerived,
unroll ? int(Derived::SizeAtCompileTime) : Dynamic
>::run(dst.derived(), src.derived());
}
else
{
if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
// traverse in row-major order
// in order to allow the compiler to unroll the inner loop
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
}
} }
}; };
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, true> struct ei_assignment_impl<Derived, OtherDerived, false, false> // no vec + no unrolling + col major order
{ {
static void execute(Derived & dst, const OtherDerived & src) static void run(Derived & dst, const OtherDerived & src)
{ {
const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic)
if(unroll)
{ {
ei_matrix_assignment_packet_unroller for(int j = 0; j < dst.cols(); j++)
<Derived, OtherDerived, for(int i = 0; i < dst.rows(); i++)
unroll && int(Derived::SizeAtCompileTime)>=int(ei_packet_traits<typename Derived::Scalar>::size) dst.coeffRef(i, j) = src.coeff(i, j);
? int(Derived::SizeAtCompileTime)-int(ei_packet_traits<typename Derived::Scalar>::size)
: Dynamic>::run(dst.const_cast_derived(), src.derived());
} }
else else
{ {
if(OtherDerived::Flags&RowMajorBit) // traverse in row-major order
{ // in order to allow the compiler to unroll the inner loop
if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) for(int i = 0; i < dst.rows(); i++)
&& (Derived::ColsAtCompileTime==Dynamic for(int j = 0; j < dst.cols(); j++)
|| Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0)) dst.coeffRef(i, j) = src.coeff(i, j);
{
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index/dst.cols();
int j = index%dst.cols();
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
for(int i = alignedSize/dst.cols(); i < dst.rows(); i++)
for(int j = alignedSize%dst.cols(); j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
}
else
{
if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
&& ( Derived::RowsAtCompileTime==Dynamic
|| Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))
{
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index%dst.rows();
int j = index/dst.rows();
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
else
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
}
} }
} }
}; };
//----
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, true, true> // vec + unrolling
{
static void run(Derived & dst, const OtherDerived & src)
{
ei_matrix_assignment_packet_unroller
<Derived, OtherDerived,
int(Derived::SizeAtCompileTime)-int(ei_packet_traits<typename Derived::Scalar>::size)
>::run(dst.const_cast_derived(), src.derived());
}
};
template <typename Derived, typename OtherDerived,
bool RowMajor = OtherDerived::Flags&RowMajorBit,
bool Complex1DArray = RowMajor
? ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
&& ( Derived::ColsAtCompileTime==Dynamic
|| Derived::ColsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0) )
: ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit)
&& ( Derived::RowsAtCompileTime==Dynamic
|| Derived::RowsAtCompileTime%ei_packet_traits<typename Derived::Scalar>::size!=0))>
struct ei_packet_assignment_seclector;
template <typename Derived, typename OtherDerived>
struct ei_assignment_impl<Derived, OtherDerived, true, false> // vec + no-unrolling
{
static void run(Derived & dst, const OtherDerived & src)
{
ei_packet_assignment_seclector<Derived,OtherDerived>::run(dst,src);
}
};
template <typename Derived, typename OtherDerived>
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, true> // row-major + complex 1D array
{
static void run(Derived & dst, const OtherDerived & src)
{
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)
* ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index/dst.cols();
int j = index%dst.cols();
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
for(int i = alignedSize/dst.cols(); i < dst.rows(); i++)
for(int j = alignedSize%dst.cols(); j < dst.cols(); j++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template <typename Derived, typename OtherDerived>
struct ei_packet_assignment_seclector<Derived, OtherDerived, true, false> // row-major + normal
{
static void run(Derived & dst, const OtherDerived & src)
{
for(int i = 0; i < dst.rows(); i++)
for(int j = 0; j < dst.cols(); j+=ei_packet_traits<typename Derived::Scalar>::size)
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
};
template <typename Derived, typename OtherDerived>
struct ei_packet_assignment_seclector<Derived, OtherDerived, false, true> // col-major + complex 1D array like
{
static void run(Derived & dst, const OtherDerived & src)
{
const int size = dst.rows() * dst.cols();
const int alignedSize = (size/ei_packet_traits<typename Derived::Scalar>::size)*ei_packet_traits<typename Derived::Scalar>::size;
int index = 0;
for ( ; index<alignedSize ; index+=ei_packet_traits<typename Derived::Scalar>::size)
{
// FIXME the following is not really efficient
int i = index%dst.rows();
int j = index/dst.rows();
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
for(int j = alignedSize/dst.rows(); j < dst.cols(); j++)
for(int i = alignedSize%dst.rows(); i < dst.rows(); i++)
dst.coeffRef(i, j) = src.coeff(i, j);
}
};
template <typename Derived, typename OtherDerived>
struct ei_packet_assignment_seclector<Derived, OtherDerived, false, false> // col-major + normal
{
static void run(Derived & dst, const OtherDerived & src)
{
for(int j = 0; j < dst.cols(); j++)
for(int i = 0; i < dst.rows(); i+=ei_packet_traits<typename Derived::Scalar>::size)
dst.template writePacketCoeff<Aligned>(i, j, src.template packetCoeff<Aligned>(i, j));
}
};
#endif // EIGEN_ASSIGN_H #endif // EIGEN_ASSIGN_H

View File

@ -595,11 +595,6 @@ template<typename Derived> class MatrixBase : public ArrayBase<Derived>
const QR<typename ei_eval<Derived>::type> qr() const; const QR<typename ei_eval<Derived>::type> qr() const;
//@} //@}
#ifdef EIGEN_MATRIX_CUSTOM_ADDONS_FILE
#include EIGEN_MATRIX_CUSTOM_ADDONS_FILE
#endif
}; };
#endif // EIGEN_MATRIXBASE_H #endif // EIGEN_MATRIXBASE_H

View File

@ -323,7 +323,7 @@ template<typename OtherDerived>
inline const typename MatrixBase<Derived>::template ProductReturnType<OtherDerived>::Type inline const typename MatrixBase<Derived>::template ProductReturnType<OtherDerived>::Type
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{ {
assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags) ); assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags&ArrayBit) );
return typename ProductReturnType<OtherDerived>::Type(derived(), other.derived()); return typename ProductReturnType<OtherDerived>::Type(derived(), other.derived());
} }