From 4cb9d0f9435e448953b673f03fda0a435570a02d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 1 Feb 2011 11:41:52 +0100 Subject: [PATCH] notify the creation of manual temporaries --- .../Core/products/SelfadjointMatrixVector.h | 102 ++++++++++++------ 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index 7d0f6c975..1d433e16d 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -37,7 +37,8 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector( Index size, const Scalar* lhs, Index lhsStride, const Scalar* _rhs, Index rhsIncr, - Scalar* res, Scalar alpha) + Scalar* res, + Scalar alpha) { typedef typename packet_traits::type Packet; typedef typename NumTraits::Real RealScalar; @@ -58,6 +59,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector( Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha; + // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed. // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, // this is because we need to extract packets const Scalar* EIGEN_RESTRICT rhs = _rhs; @@ -188,9 +190,13 @@ struct SelfadjointProductMatrix SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - template void scaleAndAddTo(Dest& dst, Scalar alpha) const + template void scaleAndAddTo(Dest& dest, Scalar alpha) const { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); + typedef typename Dest::Scalar ResScalar; + typedef typename Base::RhsScalar RhsScalar; + typedef Map, Aligned> MappedDest; + + eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); @@ -198,16 +204,66 @@ struct SelfadjointProductMatrix Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - eigen_assert(dst.innerStride()==1 && "not implemented yet"); - + enum { + EvalToDest = (Dest::InnerStrideAtCompileTime==1), + UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1) + }; + + internal::gemv_static_vector_if static_dest; + internal::gemv_static_vector_if static_rhs; + + bool freeDestPtr = false; + ResScalar* actualDestPtr; + if(EvalToDest) + actualDestPtr = dest.data(); + else + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if((actualDestPtr=static_dest.data())==0) + { + freeDestPtr = true; + actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size()); + } + MappedDest(actualDestPtr, dest.size()) = dest; + } + + bool freeRhsPtr = false; + RhsScalar* actualRhsPtr; + if(UseRhs) + actualRhsPtr = const_cast(rhs.data()); + else + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = rhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if((actualRhsPtr=static_rhs.data())==0) + { + freeRhsPtr = true; + actualRhsPtr = ei_aligned_stack_new(RhsScalar,rhs.size()); + } + Map(actualRhsPtr, rhs.size()) = rhs; + } + + internal::product_selfadjoint_vector::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)> ( - lhs.rows(), // size - &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info - &rhs.coeffRef(0), rhs.innerStride(), // rhs info - &dst.coeffRef(0), // result info - actualAlpha // scale factor + lhs.rows(), // size + &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info + actualRhsPtr, 1, // rhs info + actualDestPtr, // result info + actualAlpha // scale factor ); + + if(!EvalToDest) + { + dest = MappedDest(actualDestPtr, dest.size()); + if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size()); + } + if(freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, rhs.size()); } }; @@ -230,28 +286,12 @@ struct SelfadjointProductMatrix SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} - template void scaleAndAddTo(Dest& dst, Scalar alpha) const + template void scaleAndAddTo(Dest& dest, Scalar alpha) const { - eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); - - const ActualLhsType lhs = LhsBlasTraits::extract(m_lhs); - const ActualRhsType rhs = RhsBlasTraits::extract(m_rhs); - - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) - * RhsBlasTraits::extractScalarFactor(m_rhs); - - eigen_assert(dst.innerStride()==1 && "not implemented yet"); - - // transpose the product - internal::product_selfadjoint_vector::Flags&RowMajorBit) ? ColMajor : RowMajor, int(RhsUpLo)==Upper ? Lower : Upper, - bool(RhsBlasTraits::NeedToConjugate), bool(LhsBlasTraits::NeedToConjugate)> - ( - rhs.rows(), // size - &rhs.coeffRef(0,0), rhs.outerStride(), // lhs info - &lhs.coeffRef(0), lhs.innerStride(), // rhs info - &dst.coeffRef(0), // result info - actualAlpha // scale factor - ); + // let's simply transpose the product + Transpose destT(dest); + SelfadjointProductMatrix, int(RhsUpLo)==Upper ? Lower : Upper, false, + Transpose, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha); } };