From e5bc9526f16590288edbc3e5fd8837ea81654942 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 10 Jul 2010 22:53:27 +0200 Subject: [PATCH] * generalize rowmajor by vector * fix weird compilation error when constructing a matrix with a row by matrix product --- Eigen/src/Core/Product.h | 13 ++++-- Eigen/src/Core/SolveTriangular.h | 9 ++-- Eigen/src/Core/products/GeneralMatrixVector.h | 45 ++++++++++--------- .../Core/products/TriangularMatrixVector.h | 8 ++-- Eigen/src/Core/util/BlasUtil.h | 5 ++- test/product_large.cpp | 11 +++++ 6 files changed, 57 insertions(+), 34 deletions(-) diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index ca30c4dce..139132c6b 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -324,6 +324,7 @@ template<> struct ei_gemv_selector typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::LhsBlasTraits LhsBlasTraits; typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef Map, Aligned> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); @@ -342,7 +343,7 @@ template<> struct ei_gemv_selector else { actualDest = ei_aligned_stack_new(Scalar,dest.size()); - Map(actualDest, dest.size()) = dest; + MappedDest(actualDest, dest.size()) = dest; } ei_cache_friendly_product_colmajor_times_vector @@ -353,7 +354,7 @@ template<> struct ei_gemv_selector if (!EvalToDest) { - dest = Map(actualDest, dest.size()); + dest = MappedDest(actualDest, dest.size()); ei_aligned_stack_delete(Scalar, actualDest, dest.size()); } } @@ -365,6 +366,7 @@ template<> struct ei_gemv_selector static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) { typedef typename ProductType::Scalar Scalar; + typedef typename ProductType::Index Index; typedef typename ProductType::ActualLhsType ActualLhsType; typedef typename ProductType::ActualRhsType ActualRhsType; typedef typename ProductType::_ActualRhsType _ActualRhsType; @@ -394,9 +396,12 @@ template<> struct ei_gemv_selector } ei_cache_friendly_product_rowmajor_times_vector - ( + ( + actualLhs.rows(), actualLhs.cols(), &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), - rhs_data, prod.rhs().size(), dest, actualAlpha); + rhs_data, 1, + &dest.coeffRef(0,0), dest.innerStride(), + actualAlpha); if (!DirectlyUseRhs) ei_aligned_stack_delete(Scalar, rhs_data, prod.rhs().size()); } diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 310e262d8..90ce2a802 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -80,12 +80,13 @@ struct ei_triangular_solver_selector target(other,startRow,actualPanelWidth); - ei_cache_friendly_product_rowmajor_times_vector( + ei_cache_friendly_product_rowmajor_times_vector( + actualPanelWidth, r, &(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.outerStride(), - &(other.coeffRef(startCol)), r, - target, Scalar(-1)); + &(other.coeffRef(startCol)), other.innerStride(), + &other.coeffRef(startRow), other.innerStride(), + Scalar(-1)); } for(Index k=0; k +template static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( + Index rows, Index cols, const Scalar* lhs, Index lhsStride, - const Scalar* rhs, Index rhsSize, - ResType& res, + const Scalar* rhs, Index rhsIncr, + Scalar* res, Index resIncr, Scalar alpha) { + ei_internal_assert(rhsIncr==1); #ifdef _EIGEN_ACCUMULATE_PACKETS #error _EIGEN_ACCUMULATE_PACKETS has already been defined #endif @@ -291,22 +293,22 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( const Index peels = 2; const Index PacketAlignedMask = PacketSize-1; const Index PeelAlignedMask = PacketSize*peels-1; - const Index size = rhsSize; + const Index depth = cols; // How many coeffs of the result do we have to skip to be aligned. // Here we assume data are at least aligned on the base scalar type // if that's not the case then vectorization is discarded, see below. - Index alignedStart = ei_first_aligned(rhs, size); - Index alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; + Index alignedStart = ei_first_aligned(rhs, depth); + Index alignedSize = PacketSize>1 ? alignedStart + ((depth-alignedStart) & ~PacketAlignedMask) : 0; const Index peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; const Index alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0; Index alignmentPattern = alignmentStep==0 ? AllAligned - : alignmentStep==(PacketSize/2) ? EvenAligned - : FirstAligned; + : alignmentStep==(PacketSize/2) ? EvenAligned + : FirstAligned; // we cannot assume the first element is aligned because of sub-matrices - const Index lhsAlignmentOffset = ei_first_aligned(lhs,size); + const Index lhsAlignmentOffset = ei_first_aligned(lhs,depth); // find how many rows do we have to skip to be aligned with rhs (if possible) Index skipRows = 0; @@ -318,7 +320,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( } else if (PacketSize>1) { - ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size= res.size()) - || PacketSize > rhsSize + || (skipRows + rowsAtOnce >= rows) + || PacketSize > depth || (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0); } else if(Vectorizable) { alignedStart = 0; - alignedSize = size; + alignedSize = depth; alignmentPattern = AllAligned; } Index offset1 = (FirstAligned && alignmentStep==1?3:1); Index offset3 = (FirstAligned && alignmentStep==1?1:3); - Index rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; + Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; for (Index i=skipRows; i0) { Index s = IsLower ? 0 : pi + actualPanelWidth; - Block target(res,pi,0,actualPanelWidth,1); - ei_cache_friendly_product_rowmajor_times_vector( + ei_cache_friendly_product_rowmajor_times_vector( + actualPanelWidth, r, &(lhs.const_cast_derived().coeffRef(pi,s)), lhs.outerStride(), - &(rhs.const_cast_derived().coeffRef(s)), r, - target, alpha); + &(rhs.const_cast_derived().coeffRef(s)), 1, + &res.coeffRef(pi,0), res.innerStride(), alpha); } } } diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 12ac6994f..e53311100 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -49,9 +49,10 @@ template +template static void ei_cache_friendly_product_rowmajor_times_vector( - const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsSize, ResType& res, Scalar alpha); + Index rows, Index Cols, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsIncr, + Scalar* res, Index resIncr, Scalar alpha); template struct ei_conj_helper { diff --git a/test/product_large.cpp b/test/product_large.cpp index 2d36c5a92..2c5523913 100644 --- a/test/product_large.cpp +++ b/test/product_large.cpp @@ -64,5 +64,16 @@ void test_product_large() // only makes sure it compiles fine computeProductBlockingSizes(k1,m1,n1); } + + { + // test regression in row-vector by matrix (bad Map type) + MatrixXf mat1(10,32); mat1.setRandom(); + MatrixXf mat2(32,32); mat2.setRandom(); + MatrixXf r1 = mat1.row(2)*mat2.transpose(); + VERIFY_IS_APPROX(r1, (mat1.row(2)*mat2.transpose()).eval()); + + MatrixXf r2 = mat1.row(2)*mat2; + VERIFY_IS_APPROX(r2, (mat1.row(2)*mat2).eval()); + } #endif }