mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-13 01:43:13 +08:00
Updated the tensor type casting code as follow: in the case where TgtRatio < SrcRatio, disable the vectorization of the source expression unless is has direct-access.
This commit is contained in:
parent
d9cb604a5d
commit
b0f2b6f297
@ -93,7 +93,10 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
|
|||||||
template<int LoadMode, typename Index>
|
template<int LoadMode, typename Index>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
|
||||||
const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
||||||
if (index + SrcPacketSize < m_maxIndex) {
|
// Only call m_impl.packet() when we have direct access to the underlying data. This
|
||||||
|
// ensures that we don't compute the subexpression twice. We may however load some
|
||||||
|
// coefficients twice, but in practice this doesn't negatively impact performance.
|
||||||
|
if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
|
||||||
// Force unaligned memory loads since we can't ensure alignment anymore
|
// Force unaligned memory loads since we can't ensure alignment anymore
|
||||||
return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
|
return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user