Fix broadcasting oob error.

For vectorized 1-dimensional inputs that do not take the special
blocking path (e.g. `std::complex<...>`), there was an
index-out-of-bounds error causing the broadcast size to be
computed incorrectly.  Here we fix this, and make other minor
cleanup changes.

Fixes #2351.


(cherry picked from commit a500da1dc089b08e2f2b3b05a2eb23194425460e)
This commit is contained in:
Antonio Sanchez 2021-10-21 19:57:00 -07:00 committed by Rasmus Munk Larsen
parent b0fe14213e
commit 0ab1f8ec03
2 changed files with 46 additions and 31 deletions

View File

@ -127,7 +127,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef DSizes<Index, 2 * NumDims> BroadcastDimensions;
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
@ -144,7 +144,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
{
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D
// and store the result in a scalar. Instead one should reshape the scalar into a N-D
// tensor with N >= 1 of 1 element first and then broadcast.
EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
const InputDimensions& input_dims = m_impl.dimensions();
@ -410,25 +410,24 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
{
// Consider the flattened tensor [v0, ..., vN],
// Concatenates m_broadcast[dim] copies,
// [v0, ..., vN, v0, ..., vN, ... ]
// with dim == NumDims - 1 for col-major, dim == 0 for row-major.
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
Index dim, inputIndex;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
dim = NumDims - 1;
} else {
dim = 0;
}
inputIndex = index % m_inputStrides[dim];
if (inputIndex + PacketSize <= m_inputStrides[dim]) {
// Size of flattened tensor.
const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
m_inputStrides[NumDims - 1] : m_inputStrides[0];
Index inputIndex = index % M;
if (inputIndex + PacketSize <= M) {
return m_impl.template packet<Unaligned>(inputIndex);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
if (inputIndex > m_inputStrides[dim]-1) {
if (inputIndex > M - 1) {
inputIndex = 0;
}
values[i] = m_impl.coeff(inputIndex++);
@ -440,32 +439,30 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
{
// Consider the flattened tensor [v0, ..., vN],
// Interleaves m_broadcast[dim] copies,
// [v0, v0, ..., v1, v1, ..., vN, vN, ... ]
// with dim == 0 for col-major, dim == NumDims - 1 for row-major.
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
eigen_assert(index + PacketSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
Index dim, inputIndex, outputOffset;
const Index M = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ?
m_broadcast[0] : m_broadcast[NumDims - 1];
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
dim = 1;
} else {
dim = NumDims - 2;
}
inputIndex = index / m_outputStrides[dim];
outputOffset = index % m_outputStrides[dim];
if (outputOffset + PacketSize <= m_outputStrides[dim]) {
values[0] = m_impl.coeff(inputIndex);
return internal::pload1<PacketReturnType>(values);
Index inputIndex = index / M;
Index outputOffset = index % M;
if (outputOffset + PacketSize <= M) {
return internal::pset1<PacketReturnType>(m_impl.coeff(inputIndex));
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
if (outputOffset + cur < m_outputStrides[dim]) {
for (int i = 0; i < PacketSize; ++i) {
if (outputOffset < M) {
values[i] = m_impl.coeff(inputIndex);
++outputOffset;
} else {
values[i] = m_impl.coeff(++inputIndex);
outputOffset = 0;
cur = 0;
values[i] = m_impl.coeff(++inputIndex);
}
}
return internal::pload<PacketReturnType>(values);

View File

@ -256,6 +256,22 @@ static void test_simple_broadcasting_n_by_one()
}
}
template <int DataLayout>
static void test_size_one_broadcasting()
{
Tensor<float, 1, DataLayout> tensor(1);
tensor.setRandom();
array<ptrdiff_t, 1> broadcasts = {64};
Tensor<float, 1, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), broadcasts[0]);
for (int i = 0; i < broadcasts[0]; ++i) {
VERIFY_IS_EQUAL(tensor(0), broadcast(i));
}
}
template <int DataLayout>
static void test_simple_broadcasting_one_by_n_by_one_1d()
{
@ -328,4 +344,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting)
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
CALL_SUBTEST(test_size_one_broadcasting<ColMajor>());
CALL_SUBTEST(test_size_one_broadcasting<RowMajor>());
}