mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Remove dead code from TensorReduction.h
This commit is contained in:
parent
b5df8cabd7
commit
3fda850c46
@ -452,70 +452,6 @@ struct ReductionReturnType {
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename Self, typename Op,
|
||||
bool Vectorizable =
|
||||
(Self::InputPacketAccess & Self::ReducerTraits::PacketAccess)>
|
||||
class BlockReducer {
|
||||
public:
|
||||
typedef typename Self::Index Index;
|
||||
typedef typename Self::Scalar Scalar;
|
||||
typedef typename Self::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Self::PacketReturnType PacketReturnType;
|
||||
explicit BlockReducer(const Op& reducer) : op_(reducer) {
|
||||
accum_ = op_.initialize();
|
||||
}
|
||||
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
|
||||
for (Index i = 0; i < num_values_to_reduce; ++i) {
|
||||
op_.reduce(data[index + i], &accum_);
|
||||
}
|
||||
}
|
||||
CoeffReturnType Finalize() { return op_.finalize(accum_); }
|
||||
PacketReturnType FinalizePacket() {
|
||||
// TODO(andydavis) This function should not be called for Scalar
|
||||
// reductions: clean this up or add an assert here.
|
||||
return PacketReturnType();
|
||||
}
|
||||
|
||||
private:
|
||||
CoeffReturnType accum_;
|
||||
Op op_;
|
||||
};
|
||||
|
||||
template <typename Self, typename Op>
|
||||
class BlockReducer<Self, Op, true> {
|
||||
public:
|
||||
typedef typename Self::Index Index;
|
||||
typedef typename Self::Scalar Scalar;
|
||||
typedef typename Self::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Self::PacketReturnType PacketReturnType;
|
||||
static const Index PacketSize =
|
||||
internal::unpacket_traits<PacketReturnType>::size;
|
||||
|
||||
explicit BlockReducer(const Op& reducer) : op_(reducer) {
|
||||
vaccum_ = op_.template initializePacket<PacketReturnType>();
|
||||
accum_ = op_.initialize();
|
||||
}
|
||||
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
|
||||
const Index vectorized_size =
|
||||
(num_values_to_reduce / PacketSize) * PacketSize;
|
||||
for (Index i = 0; i < vectorized_size; i += PacketSize) {
|
||||
op_.reducePacket(
|
||||
internal::ploadt<PacketReturnType, Unaligned>(&data[index + i]),
|
||||
&vaccum_);
|
||||
}
|
||||
for (Index i = vectorized_size; i < num_values_to_reduce; ++i) {
|
||||
op_.reduce(data[index + i], &accum_);
|
||||
}
|
||||
}
|
||||
CoeffReturnType Finalize() { return op_.finalizeBoth(accum_, vaccum_); }
|
||||
PacketReturnType FinalizePacket() { return op_.finalizePacket(vaccum_); }
|
||||
|
||||
private:
|
||||
PacketReturnType vaccum_;
|
||||
CoeffReturnType accum_;
|
||||
Op op_;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@ -993,73 +929,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
||||
return startInput;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void GetInputCoordsForOutputIndex(
|
||||
Index index,
|
||||
DSizes<Index, NumInputDims>* coords) const {
|
||||
for (int i = 0; i < NumInputDims; ++i) {
|
||||
(*coords)[i] = 0;
|
||||
}
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int i = NumOutputDims - 1; i > 0; --i) {
|
||||
const Index idx = index / m_fastOutputStrides[i];
|
||||
(*coords)[m_output_to_input_dim_map[i]] = idx;
|
||||
index -= idx * m_outputStrides[i];
|
||||
}
|
||||
(*coords)[m_output_to_input_dim_map[0]] = index;
|
||||
} else {
|
||||
for (int i = 0; i < NumOutputDims - 1; ++i) {
|
||||
const Index idx = index / m_fastOutputStrides[i];
|
||||
(*coords)[m_output_to_input_dim_map[i]] = idx;
|
||||
index -= idx * m_outputStrides[i];
|
||||
}
|
||||
(*coords)[m_output_to_input_dim_map[NumOutputDims-1]] = index;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void CalculateTargetInputBlockShape(
|
||||
const Index max_coeff_count,
|
||||
const DSizes<Index, NumInputDims>& input_slice_sizes,
|
||||
DSizes<Index, NumInputDims>* target_input_block_sizes) const {
|
||||
typedef internal::BlockReducer<Self, Op> BlockReducer;
|
||||
// TODO(andydavis) Compute reducer overhead correctly for the case where
|
||||
// we are preserving the inner most dimension, and a single reducer
|
||||
// reduces a packet's worth of output coefficients.
|
||||
const Index reducer_overhead = sizeof(BlockReducer) / sizeof(Scalar);
|
||||
|
||||
Index coeff_to_allocate = max_coeff_count;
|
||||
bool first_preserved_dim_allocated = false;
|
||||
bool first_reduced_dim_allocated = false;
|
||||
for (int i = 0; i < NumInputDims; ++i) {
|
||||
const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
|
||||
? i
|
||||
: NumInputDims - i - 1;
|
||||
(*target_input_block_sizes)[dim] = 1;
|
||||
if (m_reduced[dim]) {
|
||||
// TODO(andydavis) Consider allocating to multiple reduced dimensions.
|
||||
// Watch out for cases where reduced dimensions are not contiguous,
|
||||
// which induces scattered reads.
|
||||
if (!first_reduced_dim_allocated) {
|
||||
(*target_input_block_sizes)[dim] =
|
||||
numext::mini(input_slice_sizes[dim], coeff_to_allocate);
|
||||
coeff_to_allocate /= (*target_input_block_sizes)[dim];
|
||||
first_reduced_dim_allocated = true;
|
||||
}
|
||||
} else if (!first_preserved_dim_allocated) {
|
||||
// TODO(andydavis) Include output block size in this L1 working set
|
||||
// calculation.
|
||||
const Index alloc_size = numext::maxi(
|
||||
static_cast<Index>(1), coeff_to_allocate / reducer_overhead);
|
||||
(*target_input_block_sizes)[dim] =
|
||||
numext::mini(input_slice_sizes[dim], alloc_size);
|
||||
coeff_to_allocate = numext::maxi(
|
||||
static_cast<Index>(1),
|
||||
coeff_to_allocate /
|
||||
((*target_input_block_sizes)[dim] * reducer_overhead));
|
||||
first_preserved_dim_allocated = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bitmap indicating if an input dimension is reduced or not.
|
||||
array<bool, NumInputDims> m_reduced;
|
||||
// Dimensions of the output of the operation.
|
||||
|
Loading…
x
Reference in New Issue
Block a user