From fb4a1519829eabef0699b297fa493c2c495631e5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 19 Jun 2008 23:00:51 +0000 Subject: [PATCH] * more cleaning in Product * make Matrix2f (and similar) vectorized using linear path * fix a couple of warnings and compilation issues with ICC and gcc 3.3/3.4 (cannot get Transform compiles with gcc 3.3/3.4, see the FIXME) --- Eigen/src/Core/Assign.h | 28 +++++---- Eigen/src/Core/DiagonalCoeffs.h | 4 +- Eigen/src/Core/Product.h | 101 +++++++++++++------------------- Eigen/src/Core/Redux.h | 16 ++--- Eigen/src/Core/util/Meta.h | 4 +- Eigen/src/Geometry/Cross.h | 4 +- Eigen/src/Geometry/Transform.h | 91 ++++++++++++++-------------- 7 files changed, 112 insertions(+), 136 deletions(-) diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 681b3d4ef..b0e885dfe 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -238,7 +238,7 @@ template struct ei_assign_impl { static void run(Derived1 &dst, const Derived2 &src) - { + { const bool rowMajor = int(Derived1::Flags)&RowMajorBit; const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; const int outerSize = rowMajor ? dst.rows() : dst.cols(); @@ -268,7 +268,7 @@ struct ei_assign_impl const int row = rowMajor ? j : i; const int col = rowMajor ? i : j; dst.template writePacket(row, col, src.template packet(row, col)); - } + } } } }; @@ -351,23 +351,25 @@ struct ei_assign_impl::size; - const int alignedSize = (size/packetSize)*packetSize; - const bool rowMajor = Derived1::Flags&RowMajorBit; - const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; - const int outerSize = rowMajor ? Derived1::RowsAtCompileTime : Derived1::ColsAtCompileTime; - int index = 0; + enum { + size = Derived1::SizeAtCompileTime, + packetSize = ei_packet_traits::size, + alignedSize = (int(size)/int(packetSize))*int(packetSize), + rowMajor = int(Derived1::Flags)&RowMajorBit, + innerSize = int(rowMajor) ? int(Derived1::ColsAtCompileTime) : int(Derived1::RowsAtCompileTime), + outerSize = int(rowMajor) ? int(Derived1::RowsAtCompileTime) : int(Derived1::ColsAtCompileTime) + }; // do the vectorizable part of the assignment ei_assign_innervec_CompleteUnrolling::run(dst, src); // now we must do the rest without vectorization. - const int k = alignedSize/innerSize; - const int i = alignedSize%innerSize; - + enum { + k = int(alignedSize)/int(innerSize), + i = int(alignedSize)%int(innerSize) + }; // do the remainder of the current row or col - ei_assign_novec_InnerUnrolling::run(dst, src, k); + ei_assign_novec_InnerUnrolling::run(dst, src, k); // do the remaining rows or cols for(int j = k+1; j < outerSize; j++) diff --git a/Eigen/src/Core/DiagonalCoeffs.h b/Eigen/src/Core/DiagonalCoeffs.h index 75469b4bf..b7d3ef475 100644 --- a/Eigen/src/Core/DiagonalCoeffs.h +++ b/Eigen/src/Core/DiagonalCoeffs.h @@ -101,8 +101,8 @@ template class DiagonalCoeffs * * \sa class DiagonalCoeffs */ template -DiagonalCoeffs -inline MatrixBase::diagonal() +inline DiagonalCoeffs +MatrixBase::diagonal() { return DiagonalCoeffs(derived()); } diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index f03ea4e8e..1f387af32 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -201,14 +201,21 @@ template class Product ei_assert(lhs.cols() == rhs.rows()); } - /** \internal */ - template - void _cacheFriendlyEval(DestDerived& res) const; - - /** \internal */ + /** \internal + * compute \a res += \c *this using the cache friendly product. + */ template void _cacheFriendlyEvalAndAdd(DestDerived& res) const; + /** \internal + * \returns whether it is worth it to use the cache friendly product. + */ + inline bool _useCacheFriendlyProduct() const { + return _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD; + } + private: inline int _rows() const { return m_lhs.rows(); } @@ -229,7 +236,7 @@ template class Product return res; } - template + template friend struct ei_cache_friendly_selector; protected: @@ -419,7 +426,10 @@ template inline Derived& MatrixBase::operator+=(const Flagged, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) { - other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); + if (other._expression()._useCacheFriendlyProduct()) + other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); + else + lazyAssign(derived() + other._expression()); return derived(); } @@ -427,7 +437,15 @@ template template inline Derived& MatrixBase::lazyAssign(const Product& product) { - product._cacheFriendlyEval(derived()); + if (product._useCacheFriendlyProduct()) + { + setZero(); + product._cacheFriendlyEvalAndAdd(derived()); + } + else + { + lazyAssign >(product); + } return derived(); } @@ -472,61 +490,22 @@ template struct ei_product_copy_lhs >::ret type; }; -template +template template -inline void Product::_cacheFriendlyEval(DestDerived& res) const +inline void Product::_cacheFriendlyEvalAndAdd(DestDerived& res) const { - if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ) - { - res.setZero(); - typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy; - typedef typename ei_unref::type _LhsCopy; - typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy; - typedef typename ei_unref::type _RhsCopy; - LhsCopy lhs(m_lhs); - RhsCopy rhs(m_rhs); - ei_cache_friendly_product( - _rows(), _cols(), lhs.cols(), - _LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), - _RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), - Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() - ); - } - else - { - res = Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy(); - } -} - -template -template -inline void Product::_cacheFriendlyEvalAndAdd(DestDerived& res) const -{ - if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD - ) - { - typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy; - typedef typename ei_unref::type _LhsCopy; - typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy; - typedef typename ei_unref::type _RhsCopy; - LhsCopy lhs(m_lhs); - RhsCopy rhs(m_rhs); - ei_cache_friendly_product( - _rows(), _cols(), lhs.cols(), - _LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), - _RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), - Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() - ); - } - else - { - res += Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy(); - } + typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy; + typedef typename ei_unref::type _LhsCopy; + typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy; + typedef typename ei_unref::type _RhsCopy; + LhsCopy lhs(m_lhs); + RhsCopy rhs(m_rhs); + ei_cache_friendly_product( + _rows(), _cols(), lhs.cols(), + _LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), + _RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), + Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); } #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index e7db140c5..4b93e20fd 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -101,8 +101,8 @@ MatrixBase::redux(const BinaryOp& func) const * \sa trace() */ template -typename ei_traits::Scalar -inline MatrixBase::sum() const +inline typename ei_traits::Scalar +MatrixBase::sum() const { return this->redux(Eigen::ei_scalar_sum_op()); } @@ -114,8 +114,8 @@ inline MatrixBase::sum() const * \sa diagonal(), sum() */ template -typename ei_traits::Scalar -inline MatrixBase::trace() const +inline typename ei_traits::Scalar +MatrixBase::trace() const { return diagonal().sum(); } @@ -123,8 +123,8 @@ inline MatrixBase::trace() const /** \returns the minimum of all coefficients of *this */ template -typename ei_traits::Scalar -inline MatrixBase::minCoeff() const +inline typename ei_traits::Scalar +MatrixBase::minCoeff() const { return this->redux(Eigen::ei_scalar_min_op()); } @@ -132,8 +132,8 @@ inline MatrixBase::minCoeff() const /** \returns the maximum of all coefficients of *this */ template -typename ei_traits::Scalar -inline MatrixBase::maxCoeff() const +inline typename ei_traits::Scalar +MatrixBase::maxCoeff() const { return this->redux(Eigen::ei_scalar_max_op()); } diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 509b72cc0..078beb681 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -156,10 +156,10 @@ class ei_corrected_matrix_flags ? SuggestedFlags&RowMajorBit : Cols > 1 ? RowMajorBit : 0, is_big = MaxRows == Dynamic || MaxCols == Dynamic, - inner_size = row_major_bit ? Cols : Rows, + linear_size = Cols * Rows, packet_access_bit = ei_packet_traits::size > 1 - && (is_big || inner_size%ei_packet_traits::size==0) + && (is_big || linear_size%ei_packet_traits::size==0) ? PacketAccessBit : 0 }; diff --git a/Eigen/src/Geometry/Cross.h b/Eigen/src/Geometry/Cross.h index 1ee9a007c..61b630c2a 100644 --- a/Eigen/src/Geometry/Cross.h +++ b/Eigen/src/Geometry/Cross.h @@ -28,8 +28,8 @@ /** \returns the cross product of \c *this and \a other */ template template -typename ei_eval::type -inline MatrixBase::cross(const MatrixBase& other) const +inline typename ei_eval::type +MatrixBase::cross(const MatrixBase& other) const { // Note that there is no need for an expression here since the compiler // optimize such a small temporary very well (even within a complex expression) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index b5c5b3a0d..0b5b2a3a0 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -62,6 +62,47 @@ protected: int OtherCols=Other::ColsAtCompileTime> struct ei_transform_product_impl; + // FIXME these specializations of ei_transform_product_impl does not work with gcc 3.3 and 3.4 because + // Dim depends on a template parameter. Replacing Dim by 3 (for the 3D case) works. + + // note that these specializations have to be defined here, + // otherwise some compilers (at least ICC and NVCC) complain about + // the use of Dim in the specialization parameters. + template + struct ei_transform_product_impl + { + typedef typename Transform::MatrixType MatrixType; + typedef typename ProductReturnType::Type ResultType; + static ResultType run(const Transform& tr, const Other& other) + { return tr.matrix() * other; } + }; + + template + struct ei_transform_product_impl + { + typedef typename Transform::MatrixType MatrixType; + typedef typename ProductReturnType::Type ResultType; + static ResultType run(const Transform& tr, const Other& other) + { return tr.matrix() * other; } + }; + + template + struct ei_transform_product_impl + { + typedef typename Transform::AffineMatrixRef MatrixType; + typedef const CwiseUnaryOp< + ei_scalar_multiple_op, + NestByValue, + NestByValue,Other>::Type >, + NestByValue::VectorRef> > > + > ResultType; + // FIXME shall we offer an optimized version when the last row is know to be 0,0...,0,1 ? + static ResultType run(const Transform& tr, const Other& other) + { return ((tr.affine().nestByValue() * other).nestByValue() + tr.translation().nestByValue()).nestByValue() + * (Scalar(1) / ( (tr.matrix().template block<1,Dim>(Dim,0) * other).coeff(0) + tr.matrix().coeff(Dim,Dim))); } + }; + public: /** Default constructor without initialization of the coefficients. */ @@ -103,13 +144,7 @@ public: inline VectorRef translation() { return m_matrix.template block(0,Dim); } template - struct TransformProductReturnType - { - typedef typename ei_transform_product_impl::ResultType Type; - }; - - template - const typename TransformProductReturnType::Type + const typename ei_transform_product_impl::ResultType operator * (const MatrixBase &other) const; /** Contatenates two transformations */ @@ -192,7 +227,7 @@ QMatrix Transform::toQMatrix(void) const template template -const typename Transform::template TransformProductReturnType::Type +const typename Transform::template ei_transform_product_impl::ResultType Transform::operator*(const MatrixBase &other) const { return ei_transform_product_impl::run(*this,other.derived()); @@ -373,44 +408,4 @@ Transform::fromPositionOrientationScale(const MatrixBase -template -struct Transform::ei_transform_product_impl -{ - typedef typename Transform::MatrixType MatrixType; - typedef typename ProductReturnType::Type ResultType; - static ResultType run(const Transform& tr, const Other& other) - { return tr.matrix() * other; } -}; - -template -template -struct Transform::ei_transform_product_impl -{ - typedef typename Transform::MatrixType MatrixType; - typedef typename ProductReturnType::Type ResultType; - static ResultType run(const Transform& tr, const Other& other) - { return tr.matrix() * other; } -}; - -template -template -struct Transform::ei_transform_product_impl -{ - typedef typename Transform::AffineMatrixRef MatrixType; - typedef const CwiseUnaryOp< - ei_scalar_multiple_op, - NestByValue, - NestByValue,Other>::Type >, - NestByValue::VectorRef> > > - > ResultType; - // FIXME shall we offer an optimized version when the last row is know to be 0,0...,0,1 ? - static ResultType run(const Transform& tr, const Other& other) - { return ((tr.affine().nestByValue() * other).nestByValue() + tr.translation().nestByValue()).nestByValue() - * (Scalar(1) / ( (tr.matrix().template block<1,Dim>(Dim,0) * other).coeff(0) + tr.matrix().coeff(Dim,Dim))); } -}; - #endif // EIGEN_TRANSFORM_H