Remove dead code from TensorReduction.h

This commit is contained in:
Eugene Zhulenev 2020-01-29 18:45:31 +00:00
parent b5df8cabd7
commit 3fda850c46

View File

@ -452,70 +452,6 @@ struct ReductionReturnType {
#endif
};
template <typename Self, typename Op,
bool Vectorizable =
(Self::InputPacketAccess & Self::ReducerTraits::PacketAccess)>
class BlockReducer {
public:
typedef typename Self::Index Index;
typedef typename Self::Scalar Scalar;
typedef typename Self::CoeffReturnType CoeffReturnType;
typedef typename Self::PacketReturnType PacketReturnType;
explicit BlockReducer(const Op& reducer) : op_(reducer) {
accum_ = op_.initialize();
}
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
for (Index i = 0; i < num_values_to_reduce; ++i) {
op_.reduce(data[index + i], &accum_);
}
}
CoeffReturnType Finalize() { return op_.finalize(accum_); }
PacketReturnType FinalizePacket() {
// TODO(andydavis) This function should not be called for Scalar
// reductions: clean this up or add an assert here.
return PacketReturnType();
}
private:
CoeffReturnType accum_;
Op op_;
};
template <typename Self, typename Op>
class BlockReducer<Self, Op, true> {
public:
typedef typename Self::Index Index;
typedef typename Self::Scalar Scalar;
typedef typename Self::CoeffReturnType CoeffReturnType;
typedef typename Self::PacketReturnType PacketReturnType;
static const Index PacketSize =
internal::unpacket_traits<PacketReturnType>::size;
explicit BlockReducer(const Op& reducer) : op_(reducer) {
vaccum_ = op_.template initializePacket<PacketReturnType>();
accum_ = op_.initialize();
}
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
const Index vectorized_size =
(num_values_to_reduce / PacketSize) * PacketSize;
for (Index i = 0; i < vectorized_size; i += PacketSize) {
op_.reducePacket(
internal::ploadt<PacketReturnType, Unaligned>(&data[index + i]),
&vaccum_);
}
for (Index i = vectorized_size; i < num_values_to_reduce; ++i) {
op_.reduce(data[index + i], &accum_);
}
}
CoeffReturnType Finalize() { return op_.finalizeBoth(accum_, vaccum_); }
PacketReturnType FinalizePacket() { return op_.finalizePacket(vaccum_); }
private:
PacketReturnType vaccum_;
CoeffReturnType accum_;
Op op_;
};
} // end namespace internal
@ -993,73 +929,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
return startInput;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void GetInputCoordsForOutputIndex(
Index index,
DSizes<Index, NumInputDims>* coords) const {
for (int i = 0; i < NumInputDims; ++i) {
(*coords)[i] = 0;
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumOutputDims - 1; i > 0; --i) {
const Index idx = index / m_fastOutputStrides[i];
(*coords)[m_output_to_input_dim_map[i]] = idx;
index -= idx * m_outputStrides[i];
}
(*coords)[m_output_to_input_dim_map[0]] = index;
} else {
for (int i = 0; i < NumOutputDims - 1; ++i) {
const Index idx = index / m_fastOutputStrides[i];
(*coords)[m_output_to_input_dim_map[i]] = idx;
index -= idx * m_outputStrides[i];
}
(*coords)[m_output_to_input_dim_map[NumOutputDims-1]] = index;
}
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void CalculateTargetInputBlockShape(
const Index max_coeff_count,
const DSizes<Index, NumInputDims>& input_slice_sizes,
DSizes<Index, NumInputDims>* target_input_block_sizes) const {
typedef internal::BlockReducer<Self, Op> BlockReducer;
// TODO(andydavis) Compute reducer overhead correctly for the case where
// we are preserving the inner most dimension, and a single reducer
// reduces a packet's worth of output coefficients.
const Index reducer_overhead = sizeof(BlockReducer) / sizeof(Scalar);
Index coeff_to_allocate = max_coeff_count;
bool first_preserved_dim_allocated = false;
bool first_reduced_dim_allocated = false;
for (int i = 0; i < NumInputDims; ++i) {
const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
? i
: NumInputDims - i - 1;
(*target_input_block_sizes)[dim] = 1;
if (m_reduced[dim]) {
// TODO(andydavis) Consider allocating to multiple reduced dimensions.
// Watch out for cases where reduced dimensions are not contiguous,
// which induces scattered reads.
if (!first_reduced_dim_allocated) {
(*target_input_block_sizes)[dim] =
numext::mini(input_slice_sizes[dim], coeff_to_allocate);
coeff_to_allocate /= (*target_input_block_sizes)[dim];
first_reduced_dim_allocated = true;
}
} else if (!first_preserved_dim_allocated) {
// TODO(andydavis) Include output block size in this L1 working set
// calculation.
const Index alloc_size = numext::maxi(
static_cast<Index>(1), coeff_to_allocate / reducer_overhead);
(*target_input_block_sizes)[dim] =
numext::mini(input_slice_sizes[dim], alloc_size);
coeff_to_allocate = numext::maxi(
static_cast<Index>(1),
coeff_to_allocate /
((*target_input_block_sizes)[dim] * reducer_overhead));
first_preserved_dim_allocated = true;
}
}
}
// Bitmap indicating if an input dimension is reduced or not.
array<bool, NumInputDims> m_reduced;
// Dimensions of the output of the operation.