Updated the tensor type casting code as follow: in the case where TgtRatio < SrcRatio, disable the vectorization of the source expression unless is has direct-access.

2025-09-13 01:43:13 +08:00 · 2015-03-02 10:11:40 -08:00 · 2015-03-02 10:11:40 -08:00 · b0f2b6f297
commit b0f2b6f297
parent d9cb604a5d
1 changed files with 4 additions and 1 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@ -93,7 +93,10 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
  template<int LoadMode, typename Index>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
    const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
-    if (index + SrcPacketSize < m_maxIndex) {
+    // Only call m_impl.packet() when we have direct access to the underlying data. This
    // ensures that we don't compute the subexpression twice. We may however load some
    // coefficients twice, but in practice this doesn't negatively impact performance.
    if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
      // Force unaligned memory loads since we can't ensure alignment anymore
      return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
    } else {