mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 04:35:57 +08:00
Preserve the ability to vectorize the evaluation of an expression even when it involves a cast that isn't vectorized (e.g fp16 to float)
This commit is contained in:
parent
36369ab63c
commit
1a47844529
@ -193,7 +193,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess && internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
|
||||
PacketAccess = true,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
};
|
||||
@ -224,11 +224,9 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
|
||||
const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
|
||||
PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
|
||||
SrcCoeffRatio, TgtCoeffRatio> converter(m_impl);
|
||||
return converter.template packet<LoadMode>(index);
|
||||
const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
|
||||
internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
|
||||
return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
||||
@ -249,7 +247,31 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||
|
||||
protected:
|
||||
TensorEvaluator<ArgType, Device> m_impl;
|
||||
template <int LoadMode, bool ActuallyVectorize>
|
||||
struct PacketConv {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
||||
internal::scalar_cast_op<SrcType, TargetType> converter;
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
values[i] = converter(impl.coeff(index+i));
|
||||
}
|
||||
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||
return rslt;
|
||||
}
|
||||
};
|
||||
|
||||
template <int LoadMode>
|
||||
struct PacketConv<LoadMode, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
||||
const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
|
||||
const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
|
||||
PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
|
||||
SrcCoeffRatio, TgtCoeffRatio> converter(impl);
|
||||
return converter.template packet<LoadMode>(index);
|
||||
}
|
||||
};
|
||||
|
||||
TensorEvaluator<ArgType, Device> m_impl;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
Loading…
x
Reference in New Issue
Block a user