From 0ab1f8ec03a920b01d19061ca97eaf05b81f9fb1 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Thu, 21 Oct 2021 19:57:00 -0700 Subject: [PATCH] Fix broadcasting oob error. For vectorized 1-dimensional inputs that do not take the special blocking path (e.g. `std::complex<...>`), there was an index-out-of-bounds error causing the broadcast size to be computed incorrectly. Here we fix this, and make other minor cleanup changes. Fixes #2351. (cherry picked from commit a500da1dc089b08e2f2b3b05a2eb23194425460e) --- .../CXX11/src/Tensor/TensorBroadcasting.h | 59 +++++++++---------- .../test/cxx11_tensor_broadcasting.cpp | 18 ++++++ 2 files changed, 46 insertions(+), 31 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index a354132f6..8d8ad2658 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -127,7 +127,7 @@ struct TensorEvaluator, Device> typedef DSizes BroadcastDimensions; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockDescriptor TensorBlockDesc; typedef internal::TensorBlockScratchAllocator TensorBlockScratch; typedef typename TensorEvaluator::TensorBlock @@ -144,7 +144,7 @@ struct TensorEvaluator, Device> { // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar - // and store the result in a scalar. Instead one should reshape the scalar into a a N-D + // and store the result in a scalar. Instead one should reshape the scalar into a N-D // tensor with N >= 1 of 1 element first and then broadcast. EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); const InputDimensions& input_dims = m_impl.dimensions(); @@ -410,25 +410,24 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const { + // Consider the flattened tensor [v0, ..., vN], + // Concatenates m_broadcast[dim] copies, + // [v0, ..., vN, v0, ..., vN, ... ] + // with dim == NumDims - 1 for col-major, dim == 0 for row-major. EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - Index dim, inputIndex; - - if (static_cast(Layout) == static_cast(ColMajor)) { - dim = NumDims - 1; - } else { - dim = 0; - } - - inputIndex = index % m_inputStrides[dim]; - if (inputIndex + PacketSize <= m_inputStrides[dim]) { + // Size of flattened tensor. + const Index M = (static_cast(Layout) == static_cast(ColMajor)) ? + m_inputStrides[NumDims - 1] : m_inputStrides[0]; + Index inputIndex = index % M; + if (inputIndex + PacketSize <= M) { return m_impl.template packet(inputIndex); } else { EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; EIGEN_UNROLL_LOOP for (int i = 0; i < PacketSize; ++i) { - if (inputIndex > m_inputStrides[dim]-1) { + if (inputIndex > M - 1) { inputIndex = 0; } values[i] = m_impl.coeff(inputIndex++); @@ -440,32 +439,30 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const { + // Consider the flattened tensor [v0, ..., vN], + // Interleaves m_broadcast[dim] copies, + // [v0, v0, ..., v1, v1, ..., vN, vN, ... ] + // with dim == 0 for col-major, dim == NumDims - 1 for row-major. EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + eigen_assert(index + PacketSize-1 < dimensions().TotalSize()); - EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; - Index dim, inputIndex, outputOffset; + const Index M = (static_cast(Layout) == static_cast(ColMajor)) ? + m_broadcast[0] : m_broadcast[NumDims - 1]; - if (static_cast(Layout) == static_cast(ColMajor)) { - dim = 1; - } else { - dim = NumDims - 2; - } - - inputIndex = index / m_outputStrides[dim]; - outputOffset = index % m_outputStrides[dim]; - if (outputOffset + PacketSize <= m_outputStrides[dim]) { - values[0] = m_impl.coeff(inputIndex); - return internal::pload1(values); + Index inputIndex = index / M; + Index outputOffset = index % M; + if (outputOffset + PacketSize <= M) { + return internal::pset1(m_impl.coeff(inputIndex)); } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; EIGEN_UNROLL_LOOP - for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) { - if (outputOffset + cur < m_outputStrides[dim]) { + for (int i = 0; i < PacketSize; ++i) { + if (outputOffset < M) { values[i] = m_impl.coeff(inputIndex); + ++outputOffset; } else { - values[i] = m_impl.coeff(++inputIndex); outputOffset = 0; - cur = 0; + values[i] = m_impl.coeff(++inputIndex); } } return internal::pload(values); diff --git a/unsupported/test/cxx11_tensor_broadcasting.cpp b/unsupported/test/cxx11_tensor_broadcasting.cpp index d3dab891f..cbd92c328 100644 --- a/unsupported/test/cxx11_tensor_broadcasting.cpp +++ b/unsupported/test/cxx11_tensor_broadcasting.cpp @@ -256,6 +256,22 @@ static void test_simple_broadcasting_n_by_one() } } +template +static void test_size_one_broadcasting() +{ + Tensor tensor(1); + tensor.setRandom(); + array broadcasts = {64}; + Tensor broadcast; + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), broadcasts[0]); + + for (int i = 0; i < broadcasts[0]; ++i) { + VERIFY_IS_EQUAL(tensor(0), broadcast(i)); + } +} + template static void test_simple_broadcasting_one_by_n_by_one_1d() { @@ -328,4 +344,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting) CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d()); CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d()); CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d()); + CALL_SUBTEST(test_size_one_broadcasting()); + CALL_SUBTEST(test_size_one_broadcasting()); }