Fix broadcasting oob error.

For vectorized 1-dimensional inputs that do not take the special blocking path (e.g. `std::complex<...>`), there was an index-out-of-bounds error causing the broadcast size to be computed incorrectly. Here we fix this, and make other minor cleanup changes. Fixes #2351. (cherry picked from commit a500da1dc089b08e2f2b3b05a2eb23194425460e)
2025-07-08 22:21:49 +08:00 · 2021-10-21 19:57:00 -07:00 · 2021-10-21 19:57:00 -07:00 · 0ab1f8ec03
commit 0ab1f8ec03
parent b0fe14213e
2 changed files with 46 additions and 31 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@ -127,7 +127,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
  typedef DSizes<Index, 2 * NumDims> BroadcastDimensions;

  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
- typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;

  typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
@ -144,7 +144,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
  {

    // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
-    // and store the result in a scalar. Instead one should reshape the scalar into a a N-D
+    // and store the result in a scalar. Instead one should reshape the scalar into a N-D
    // tensor with N >= 1 of 1 element first and then broadcast.
    EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
    const InputDimensions& input_dims = m_impl.dimensions();
@ -410,25 +410,24 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
  {
+    // Consider the flattened tensor [v0, ..., vN],
+    // Concatenates m_broadcast[dim] copies,
+    //    [v0, ..., vN, v0, ..., vN, ... ]
+    // with dim == NumDims - 1 for col-major, dim == 0 for row-major.
    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(index+PacketSize-1 < dimensions().TotalSize());

-    Index dim, inputIndex;
-
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      dim = NumDims - 1;
-    } else {
-      dim = 0;
-    }
-
-    inputIndex = index % m_inputStrides[dim];
-    if (inputIndex + PacketSize <= m_inputStrides[dim]) {
+    // Size of flattened tensor.
+    const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
+                      m_inputStrides[NumDims - 1] : m_inputStrides[0];
+    Index inputIndex = index % M;
+    if (inputIndex + PacketSize <= M) {
      return m_impl.template packet<Unaligned>(inputIndex);
    } else {
      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
      EIGEN_UNROLL_LOOP
      for (int i = 0; i < PacketSize; ++i) {
-        if (inputIndex > m_inputStrides[dim]-1) {
+        if (inputIndex > M - 1) {
          inputIndex = 0;
        }
        values[i] = m_impl.coeff(inputIndex++);
@ -440,32 +439,30 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
  {
+    // Consider the flattened tensor [v0, ..., vN],
+    // Interleaves m_broadcast[dim] copies,
+    //    [v0, v0, ..., v1, v1, ..., vN, vN, ... ]
+    // with dim == 0 for col-major, dim == NumDims - 1 for row-major.
    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
-    eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
+    eigen_assert(index + PacketSize-1 < dimensions().TotalSize());

-    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
-    Index dim, inputIndex, outputOffset;
+    const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
+                      m_broadcast[0] : m_broadcast[NumDims - 1];

-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      dim = 1;
-    } else {
-      dim = NumDims - 2;
-    }
-
-    inputIndex   = index / m_outputStrides[dim];
-    outputOffset = index % m_outputStrides[dim];
-    if (outputOffset + PacketSize <= m_outputStrides[dim]) {
-      values[0] = m_impl.coeff(inputIndex);
-      return internal::pload1<PacketReturnType>(values);
+    Index inputIndex   = index / M;
+    Index outputOffset = index % M;
+    if (outputOffset + PacketSize <= M) {
+      return internal::pset1<PacketReturnType>(m_impl.coeff(inputIndex));
    } else {
+      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
      EIGEN_UNROLL_LOOP
-      for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
-        if (outputOffset + cur < m_outputStrides[dim]) {
+      for (int i = 0; i < PacketSize; ++i) {
+        if (outputOffset < M) {
          values[i] = m_impl.coeff(inputIndex);
+          ++outputOffset;
        } else {
-          values[i] = m_impl.coeff(++inputIndex);
          outputOffset = 0;
-          cur = 0;
+          values[i] = m_impl.coeff(++inputIndex);
        }
      }
      return internal::pload<PacketReturnType>(values);
--- a/unsupported/test/cxx11_tensor_broadcasting.cpp
+++ b/unsupported/test/cxx11_tensor_broadcasting.cpp
@ -256,6 +256,22 @@ static void test_simple_broadcasting_n_by_one()
  }
 }

+template <int DataLayout>
+static void test_size_one_broadcasting()
+{
+  Tensor<float, 1, DataLayout> tensor(1);
+  tensor.setRandom();
+  array<ptrdiff_t, 1> broadcasts = {64};
+  Tensor<float, 1, DataLayout> broadcast;
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), broadcasts[0]);
+
+  for (int i = 0; i < broadcasts[0]; ++i) {
+    VERIFY_IS_EQUAL(tensor(0), broadcast(i));
+  }
+}
+
 template <int DataLayout>
 static void test_simple_broadcasting_one_by_n_by_one_1d()
 {
@ -328,4 +344,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting)
  CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
  CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
  CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
+  CALL_SUBTEST(test_size_one_broadcasting<ColMajor>());
+  CALL_SUBTEST(test_size_one_broadcasting<RowMajor>());
 }