Preserve the ability to vectorize the evaluation of an expression even when it involves a cast that isn't vectorized (e.g fp16 to float)

This commit is contained in:
Benoit Steiner 2016-05-26 14:37:09 -07:00
parent 36369ab63c
commit 1a47844529

View File

@ -193,7 +193,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess && internal::type_casting_traits<SrcType, TargetType>::VectorizedCast, PacketAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false RawAccess = false
}; };
@ -224,11 +224,9 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
template<int LoadMode> template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{ {
const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
SrcCoeffRatio, TgtCoeffRatio> converter(m_impl);
return converter.template packet<LoadMode>(index);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
@ -249,6 +247,30 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
protected: protected:
template <int LoadMode, bool ActuallyVectorize>
struct PacketConv {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
internal::scalar_cast_op<SrcType, TargetType> converter;
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
for (int i = 0; i < PacketSize; ++i) {
values[i] = converter(impl.coeff(index+i));
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt;
}
};
template <int LoadMode>
struct PacketConv<LoadMode, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
SrcCoeffRatio, TgtCoeffRatio> converter(impl);
return converter.template packet<LoadMode>(index);
}
};
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
}; };