mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Add PacketConv implementation for non-vectorizable src expressions
This commit is contained in:
parent
1e36166ed1
commit
21eb97d3e0
@ -230,8 +230,21 @@ struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize>
|
template <typename SrcPacket, typename TargetPacket, int LoadMode>
|
||||||
struct PacketConv<SrcPacket, TargetPacket, LoadMode, ActuallyVectorize, true> {
|
struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
|
||||||
|
typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
|
||||||
|
static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
|
||||||
|
|
||||||
|
template <typename ArgType, typename Device>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
||||||
|
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
|
||||||
|
for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
|
||||||
|
return internal::pload<TargetPacket>(values);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename SrcPacket, typename TargetPacket, int LoadMode>
|
||||||
|
struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
|
||||||
template <typename ArgType, typename Device>
|
template <typename ArgType, typename Device>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
||||||
return impl.template packet<LoadMode>(index);
|
return impl.template packet<LoadMode>(index);
|
||||||
@ -287,10 +300,17 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
|
||||||
{
|
packet(Index index) const {
|
||||||
const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
|
// If we are not going to do the cast, we just need to check that base
|
||||||
internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
|
// TensorEvaluator has packet access. Otherwise we also need to make sure,
|
||||||
|
// that we have an implementation of vectorized cast.
|
||||||
|
const bool Vectorizable =
|
||||||
|
IsSameType
|
||||||
|
? TensorEvaluator<ArgType, Device>::PacketAccess
|
||||||
|
: TensorEvaluator<ArgType, Device>::PacketAccess &
|
||||||
|
internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
|
||||||
|
|
||||||
return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
|
return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
|
||||||
Vectorizable, IsSameType>::run(m_impl, index);
|
Vectorizable, IsSameType>::run(m_impl, index);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user