mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Fixed a bug in the dispatch of optimized reduction kernels.
This commit is contained in:
parent
780623261e
commit
2ccb1c8634
@ -438,19 +438,18 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
||||||
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
|
||||||
// Bitmap indicating if an input dimension is reduced or not.
|
// Build the bitmap indicating if an input dimension is reduced or not.
|
||||||
array<bool, NumInputDims> reduced;
|
|
||||||
for (int i = 0; i < NumInputDims; ++i) {
|
for (int i = 0; i < NumInputDims; ++i) {
|
||||||
reduced[i] = false;
|
m_reduced[i] = false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < NumReducedDims; ++i) {
|
for (int i = 0; i < NumReducedDims; ++i) {
|
||||||
eigen_assert(op.dims()[i] >= 0);
|
eigen_assert(op.dims()[i] >= 0);
|
||||||
eigen_assert(op.dims()[i] < NumInputDims);
|
eigen_assert(op.dims()[i] < NumInputDims);
|
||||||
reduced[op.dims()[i]] = true;
|
m_reduced[op.dims()[i]] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
internal::DimInitializer<Dimensions>::run(input_dims, reduced, &m_dimensions, &m_reducedDims);
|
internal::DimInitializer<Dimensions>::run(input_dims, m_reduced, &m_dimensions, &m_reducedDims);
|
||||||
|
|
||||||
// Precompute output strides.
|
// Precompute output strides.
|
||||||
if (NumOutputDims > 0) {
|
if (NumOutputDims > 0) {
|
||||||
@ -485,7 +484,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
int outputIndex = 0;
|
int outputIndex = 0;
|
||||||
int reduceIndex = 0;
|
int reduceIndex = 0;
|
||||||
for (int i = 0; i < NumInputDims; ++i) {
|
for (int i = 0; i < NumInputDims; ++i) {
|
||||||
if (reduced[i]) {
|
if (m_reduced[i]) {
|
||||||
m_reducedStrides[reduceIndex] = input_strides[i];
|
m_reducedStrides[reduceIndex] = input_strides[i];
|
||||||
++reduceIndex;
|
++reduceIndex;
|
||||||
} else {
|
} else {
|
||||||
@ -531,9 +530,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
bool reducing_inner_dims = true;
|
bool reducing_inner_dims = true;
|
||||||
for (int i = 0; i < NumReducedDims; ++i) {
|
for (int i = 0; i < NumReducedDims; ++i) {
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
reducing_inner_dims &= m_reducedDims[i];
|
reducing_inner_dims &= m_reduced[i];
|
||||||
} else {
|
} else {
|
||||||
reducing_inner_dims &= m_reducedDims[NumInputDims - 1 - i];
|
reducing_inner_dims &= m_reduced[NumInputDims - 1 - i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (internal::InnerReducer<Self, Op, Device>::HasOptimizedImplementation &&
|
if (internal::InnerReducer<Self, Op, Device>::HasOptimizedImplementation &&
|
||||||
@ -548,9 +547,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
bool preserving_inner_dims = true;
|
bool preserving_inner_dims = true;
|
||||||
for (int i = 0; i < NumReducedDims; ++i) {
|
for (int i = 0; i < NumReducedDims; ++i) {
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
preserving_inner_dims &= m_reducedDims[NumInputDims - 1 - i];
|
preserving_inner_dims &= m_reduced[NumInputDims - 1 - i];
|
||||||
} else {
|
} else {
|
||||||
preserving_inner_dims &= m_reducedDims[i];
|
preserving_inner_dims &= m_reduced[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (internal::OuterReducer<Self, Op, Device>::HasOptimizedImplementation &&
|
if (internal::OuterReducer<Self, Op, Device>::HasOptimizedImplementation &&
|
||||||
@ -689,6 +688,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
return startInput;
|
return startInput;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bitmap indicating if an input dimension is reduced or not.
|
||||||
|
array<bool, NumInputDims> m_reduced;
|
||||||
// Dimensions of the output of the operation.
|
// Dimensions of the output of the operation.
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
// Precomputed strides for the output tensor.
|
// Precomputed strides for the output tensor.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user