From f5e599e48902826bea128c3bdc651bbe2a5fad53 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 31 May 2008 14:42:07 +0000 Subject: [PATCH] * replace compile-time-if by meta-selector in Assign.h as it speed up compilation. * fix minor typo introduced in the previous commit --- Eigen/src/Core/ArrayBase.h | 2 +- Eigen/src/Core/Assign.h | 260 +++++++++++++++++++++--------------- Eigen/src/Core/MatrixBase.h | 5 - Eigen/src/Core/Product.h | 2 +- 4 files changed, 151 insertions(+), 118 deletions(-) diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 222c0256a..874f0754a 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -42,4 +42,4 @@ public: } }; -#endif // EIGEN_ARRAYBASE_H \ No newline at end of file +#endif // EIGEN_ARRAYBASE_H diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index fa64cb790..5ab46807b 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -98,13 +98,16 @@ struct ei_matrix_assignment_packet_unroller { ei_internal_assert(false && "ei_matrix_assignment_packet_unroller"); } }; +//---- + template ::size==0) - : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits::size==0)) )> + : int(Derived::RowsAtCompileTime)!=Dynamic && (int(Derived::RowsAtCompileTime)%ei_packet_traits::size==0)) ), +bool Unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT> struct ei_assignment_impl; template @@ -112,141 +115,176 @@ template inline Derived& MatrixBase ::lazyAssign(const MatrixBase& other) { - ei_assignment_impl::execute(derived(),other.derived()); + ei_assert(rows() == other.rows() && cols() == other.cols()); + ei_assignment_impl::run(derived(),other.derived()); return derived(); } +template +struct ei_assign_selector; + +template +struct ei_assign_selector { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } +}; +template +struct ei_assign_selector { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } +}; +template +struct ei_assign_selector { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } +}; +template +struct ei_assign_selector { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } +}; + template template inline Derived& MatrixBase ::operator=(const MatrixBase& other) { - const bool need_to_transpose = Derived::IsVectorAtCompileTime - && OtherDerived::IsVectorAtCompileTime - && (int)Derived::RowsAtCompileTime != (int)OtherDerived::RowsAtCompileTime - && (int)Derived::ColsAtCompileTime != (int)OtherDerived::ColsAtCompileTime; - if(OtherDerived::Flags & EvalBeforeAssigningBit) - { - if(need_to_transpose) - return lazyAssign(other.transpose().eval()); - else - return lazyAssign(other.eval()); - } - else - { - if(need_to_transpose) - return lazyAssign(other.transpose()); - else - return lazyAssign(other.derived()); - } + return ei_assign_selector::run(derived(), other.derived()); } +//---- + template -struct ei_assignment_impl +struct ei_assignment_impl // no vec + unrolling { - static void execute(Derived & dst, const OtherDerived & src) + static void run(Derived & dst, const OtherDerived & src) { - const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - ei_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - if(unroll) - { - ei_matrix_assignment_unroller - ::run(dst.derived(), src.derived()); - } - else - { - if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) - { - for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - // traverse in row-major order - // in order to allow the compiler to unroll the inner loop - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - } + ei_matrix_assignment_unroller + ::run(dst.derived(), src.derived()); } }; template -struct ei_assignment_impl +struct ei_assignment_impl // no vec + no unrolling + col major order { - static void execute(Derived & dst, const OtherDerived & src) + static void run(Derived & dst, const OtherDerived & src) { - const bool unroll = Derived::SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT; - if(unroll) + if(Derived::ColsAtCompileTime == Dynamic || Derived::RowsAtCompileTime != Dynamic) { - ei_matrix_assignment_packet_unroller - =int(ei_packet_traits::size) - ? int(Derived::SizeAtCompileTime)-int(ei_packet_traits::size) - : Dynamic>::run(dst.const_cast_derived(), src.derived()); + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); } else { - if(OtherDerived::Flags&RowMajorBit) - { - if ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) - && (Derived::ColsAtCompileTime==Dynamic - || Derived::ColsAtCompileTime%ei_packet_traits::size!=0)) - { - const int size = dst.rows() * dst.cols(); - const int alignedSize = (size/ei_packet_traits::size)*ei_packet_traits::size; - int index = 0; - for ( ; index::size) - { - // FIXME the following is not really efficient - int i = index/dst.cols(); - int j = index%dst.cols(); - dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); - } - for(int i = alignedSize/dst.cols(); i < dst.rows(); i++) - for(int j = alignedSize%dst.cols(); j < dst.cols(); j++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - for(int i = 0; i < dst.rows(); i++) - for(int j = 0; j < dst.cols(); j+=ei_packet_traits::size) - dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); - } - } - else - { - if ((Derived::Flags & OtherDerived::Flags & Like1DArrayBit) - && ( Derived::RowsAtCompileTime==Dynamic - || Derived::RowsAtCompileTime%ei_packet_traits::size!=0)) - { - const int size = dst.rows() * dst.cols(); - const int alignedSize = (size/ei_packet_traits::size)*ei_packet_traits::size; - int index = 0; - for ( ; index::size) - { - // FIXME the following is not really efficient - int i = index%dst.rows(); - int j = index/dst.rows(); - dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); - } - for(int j = alignedSize/dst.rows(); j < dst.cols(); j++) - for(int i = alignedSize%dst.rows(); i < dst.rows(); i++) - dst.coeffRef(i, j) = src.coeff(i, j); - } - else - { - for(int j = 0; j < dst.cols(); j++) - for(int i = 0; i < dst.rows(); i+=ei_packet_traits::size) - dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); - } - } + // traverse in row-major order + // in order to allow the compiler to unroll the inner loop + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j++) + dst.coeffRef(i, j) = src.coeff(i, j); } } }; +//---- + +template +struct ei_assignment_impl // vec + unrolling +{ + static void run(Derived & dst, const OtherDerived & src) + { + ei_matrix_assignment_packet_unroller + ::size) + >::run(dst.const_cast_derived(), src.derived()); + } +}; + +template ::size!=0) ) + : ( (Derived::Flags & OtherDerived::Flags & Like1DArrayBit) + && ( Derived::RowsAtCompileTime==Dynamic + || Derived::RowsAtCompileTime%ei_packet_traits::size!=0))> +struct ei_packet_assignment_seclector; + +template +struct ei_assignment_impl // vec + no-unrolling +{ + static void run(Derived & dst, const OtherDerived & src) + { + ei_packet_assignment_seclector::run(dst,src); + } +}; + +template +struct ei_packet_assignment_seclector // row-major + complex 1D array +{ + static void run(Derived & dst, const OtherDerived & src) + { + const int size = dst.rows() * dst.cols(); + const int alignedSize = (size/ei_packet_traits::size) + * ei_packet_traits::size; + int index = 0; + for ( ; index::size) + { + // FIXME the following is not really efficient + int i = index/dst.cols(); + int j = index%dst.cols(); + dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); + } + for(int i = alignedSize/dst.cols(); i < dst.rows(); i++) + for(int j = alignedSize%dst.cols(); j < dst.cols(); j++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; + +template +struct ei_packet_assignment_seclector // row-major + normal +{ + static void run(Derived & dst, const OtherDerived & src) + { + for(int i = 0; i < dst.rows(); i++) + for(int j = 0; j < dst.cols(); j+=ei_packet_traits::size) + dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); + } +}; + +template +struct ei_packet_assignment_seclector // col-major + complex 1D array like +{ + static void run(Derived & dst, const OtherDerived & src) + { + const int size = dst.rows() * dst.cols(); + const int alignedSize = (size/ei_packet_traits::size)*ei_packet_traits::size; + int index = 0; + for ( ; index::size) + { + // FIXME the following is not really efficient + int i = index%dst.rows(); + int j = index/dst.rows(); + dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); + } + for(int j = alignedSize/dst.rows(); j < dst.cols(); j++) + for(int i = alignedSize%dst.rows(); i < dst.rows(); i++) + dst.coeffRef(i, j) = src.coeff(i, j); + } +}; + +template +struct ei_packet_assignment_seclector // col-major + normal +{ + static void run(Derived & dst, const OtherDerived & src) + { + for(int j = 0; j < dst.cols(); j++) + for(int i = 0; i < dst.rows(); i+=ei_packet_traits::size) + dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); + } +}; + #endif // EIGEN_ASSIGN_H diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index c39b2d93e..62e8e1057 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -595,11 +595,6 @@ template class MatrixBase : public ArrayBase const QR::type> qr() const; //@} - - #ifdef EIGEN_MATRIX_CUSTOM_ADDONS_FILE - #include EIGEN_MATRIX_CUSTOM_ADDONS_FILE - #endif - }; #endif // EIGEN_MATRIXBASE_H diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index fb0f732b5..2c0655955 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -323,7 +323,7 @@ template inline const typename MatrixBase::template ProductReturnType::Type MatrixBase::operator*(const MatrixBase &other) const { - assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags) ); + assert( (Derived::Flags&ArrayBit) == (OtherDerived::Flags&ArrayBit) ); return typename ProductReturnType::Type(derived(), other.derived()); }