diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index f62ecfcbe..0f16cd8e3 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -224,50 +224,65 @@ template<> struct gemv_dense_selector // on, the other hand it is good for the cache to pack the vector anyways... EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), - MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal + MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal }; - gemv_static_vector_if static_dest; - - const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); - const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; - - RhsScalar compatibleAlpha = get_factor::run(actualAlpha); - - ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), - evalToDest ? dest.data() : static_dest.data()); - - if(!evalToDest) - { - #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - Index size = dest.size(); - EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #endif - if(!alphaIsCompatible) - { - MappedDest(actualDestPtr, dest.size()).setZero(); - compatibleAlpha = RhsScalar(1); - } - else - MappedDest(actualDestPtr, dest.size()) = dest; - } - typedef const_blas_data_mapper LhsMapper; typedef const_blas_data_mapper RhsMapper; - general_matrix_vector_product - ::run( - actualLhs.rows(), actualLhs.cols(), - LhsMapper(actualLhs.data(), actualLhs.outerStride()), - RhsMapper(actualRhs.data(), actualRhs.innerStride()), - actualDestPtr, 1, - compatibleAlpha); + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); - if (!evalToDest) + if(!MightCannotUseDest) { - if(!alphaIsCompatible) - dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); - else - dest = MappedDest(actualDestPtr, dest.size()); + // shortcut if we are sure to be able to use dest directly, + // this ease the compiler to generate cleaner and more optimzized code for most common cases + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + dest.data(), 1, + compatibleAlpha); + } + else + { + gemv_static_vector_if static_dest; + + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; + } + + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + actualDestPtr, 1, + compatibleAlpha); + + if (!evalToDest) + { + if(!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } } } };