Ease compiler job to generate clean and efficient code in mat*vec.

This commit is contained in:
Gael Guennebaud 2016-12-02 22:41:26 +01:00
parent fe696022ec
commit 66f65ccc36

View File

@ -224,16 +224,32 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
};
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
if(!MightCannotUseDest)
{
// shortcut if we are sure to be able to use dest directly,
// this ease the compiler to generate cleaner and more optimzized code for most common cases
general_matrix_vector_product
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
dest.data(), 1,
compatibleAlpha);
}
else
{
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
@ -252,8 +268,6 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
MappedDest(actualDestPtr, dest.size()) = dest;
}
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
general_matrix_vector_product
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
actualLhs.rows(), actualLhs.cols(),
@ -270,6 +284,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
dest = MappedDest(actualDestPtr, dest.size());
}
}
}
};
template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>