mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-30 07:44:10 +08:00
Ease compiler job to generate clean and efficient code in mat*vec.
This commit is contained in:
parent
fe696022ec
commit
66f65ccc36
@ -224,50 +224,65 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
|||||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||||
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
||||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||||
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
|
||||||
};
|
};
|
||||||
|
|
||||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
|
||||||
|
|
||||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
|
||||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
|
||||||
|
|
||||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
|
||||||
|
|
||||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
|
||||||
evalToDest ? dest.data() : static_dest.data());
|
|
||||||
|
|
||||||
if(!evalToDest)
|
|
||||||
{
|
|
||||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
|
||||||
Index size = dest.size();
|
|
||||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
|
||||||
#endif
|
|
||||||
if(!alphaIsCompatible)
|
|
||||||
{
|
|
||||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
|
||||||
compatibleAlpha = RhsScalar(1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
||||||
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
||||||
general_matrix_vector_product
|
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
|
||||||
actualLhs.rows(), actualLhs.cols(),
|
|
||||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
|
||||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
|
||||||
actualDestPtr, 1,
|
|
||||||
compatibleAlpha);
|
|
||||||
|
|
||||||
if (!evalToDest)
|
if(!MightCannotUseDest)
|
||||||
{
|
{
|
||||||
if(!alphaIsCompatible)
|
// shortcut if we are sure to be able to use dest directly,
|
||||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
// this ease the compiler to generate cleaner and more optimzized code for most common cases
|
||||||
else
|
general_matrix_vector_product
|
||||||
dest = MappedDest(actualDestPtr, dest.size());
|
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||||
|
actualLhs.rows(), actualLhs.cols(),
|
||||||
|
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||||
|
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||||
|
dest.data(), 1,
|
||||||
|
compatibleAlpha);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||||
|
|
||||||
|
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||||
|
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||||
|
|
||||||
|
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||||
|
evalToDest ? dest.data() : static_dest.data());
|
||||||
|
|
||||||
|
if(!evalToDest)
|
||||||
|
{
|
||||||
|
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||||
|
Index size = dest.size();
|
||||||
|
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||||
|
#endif
|
||||||
|
if(!alphaIsCompatible)
|
||||||
|
{
|
||||||
|
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||||
|
compatibleAlpha = RhsScalar(1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
general_matrix_vector_product
|
||||||
|
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||||
|
actualLhs.rows(), actualLhs.cols(),
|
||||||
|
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||||
|
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||||
|
actualDestPtr, 1,
|
||||||
|
compatibleAlpha);
|
||||||
|
|
||||||
|
if (!evalToDest)
|
||||||
|
{
|
||||||
|
if(!alphaIsCompatible)
|
||||||
|
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||||
|
else
|
||||||
|
dest = MappedDest(actualDestPtr, dest.size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user