diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 4c0fc7f63..57d5d3c38 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -342,16 +342,19 @@ class GeneralProduct */ namespace internal { +#ifndef EIGEN_TEST_EVALUATORS template struct traits > : traits, Lhs, Rhs> > {}; +#endif template struct gemv_selector; } // end namespace internal +#ifndef EIGEN_TEST_EVALUATORS template class GeneralProduct : public ProductBase, Lhs, Rhs> @@ -378,24 +381,10 @@ class GeneralProduct bool(internal::blas_traits::HasUsableDirectAccess)>::run(*this, dst, alpha); } }; +#endif namespace internal { -// The vector is on the left => transposition -template -struct gemv_selector -{ - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) - { - Transpose destT(dest); - enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_selector - ::run(GeneralProduct,Transpose, GemvProduct> - (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); - } -}; - template struct gemv_static_vector_if; template @@ -432,6 +421,23 @@ struct gemv_static_vector_if #endif }; +#ifndef EIGEN_TEST_EVALUATORS + +// The vector is on the left => transposition +template +struct gemv_selector +{ + template + static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_selector + ::run(GeneralProduct,Transpose, GemvProduct> + (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); + } +}; + template<> struct gemv_selector { template @@ -582,6 +588,178 @@ template<> struct gemv_selector } }; +#else // EIGEN_TEST_EVALUATORS + +// The vector is on the left => transposition +template +struct gemv_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_selector + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_selector +{ + template + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + + typedef Map, Aligned> MappedDest; + + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1, + ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), + MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal + }; + + gemv_static_vector_if static_dest; + + bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; + } + + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + actualLhs.data(), actualLhs.outerStride(), + actualRhs.data(), actualRhs.innerStride(), + actualDestPtr, 1, + compatibleAlpha); + + if (!evalToDest) + { + if(!alphaIsCompatible) + dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } + } +}; + +template<> struct gemv_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Dest::Index Index; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 + }; + + gemv_static_vector_if static_rhs; + + ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), + DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); + + if(!DirectlyUseRhs) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + int size = actualRhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + Map(actualRhsPtr, actualRhs.size()) = actualRhs; + } + + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + actualLhs.data(), actualLhs.outerStride(), + actualRhsPtr, 1, + dest.data(), dest.innerStride(), + actualAlpha); + } +}; + +template<> struct gemv_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Dest::Index Index; + // TODO makes sure dest is sequentially stored in memory, otherwise use a temp + const Index size = rhs.rows(); + for(Index k=0; k struct gemv_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Dest::Index Index; + // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp + const Index rows = dest.rows(); + for(Index i=0; i }; #ifndef EIGEN_TEST_EVALUATORS + // here we need to overload the nested rule for products // such that the nested type is a const reference to a plain matrix namespace internal { diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 99c48ae52..5f9cf69a2 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -307,7 +307,7 @@ struct generic_product_impl internal::gemv_selector::HasUsableDirectAccess) - >::run(GeneralProduct(lhs,rhs), dst, alpha); + >::run(lhs, rhs, dst, alpha); } };