mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Improved the performance of tensor padding
This commit is contained in:
parent
58026905ae
commit
034aa3b2c0
@ -178,9 +178,14 @@ template <typename Device, typename T> class BenchmarkSuite {
|
|||||||
size_b[1] = m_;
|
size_b[1] = m_;
|
||||||
TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);
|
TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);
|
||||||
|
|
||||||
|
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||||
|
Eigen::IndexPairList<Eigen::type2indexpair<0, 0>,
|
||||||
|
Eigen::type2indexpair<2, 1> > paddings;
|
||||||
|
#else
|
||||||
Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
|
Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
|
||||||
paddings[0] = Eigen::IndexPair<TensorIndex>(0, 0);
|
paddings[0] = Eigen::IndexPair<TensorIndex>(0, 0);
|
||||||
paddings[1] = Eigen::IndexPair<TensorIndex>(2, 1);
|
paddings[1] = Eigen::IndexPair<TensorIndex>(2, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
for (int iter = 0; iter < num_iters; ++iter) {
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
@ -150,27 +150,26 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
if (isPaddingAtIndexForDim(idx, i)) {
|
||||||
return m_paddingValue;
|
return m_paddingValue;
|
||||||
}
|
}
|
||||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
|
if (isPaddingAtIndexForDim(index, 0)) {
|
||||||
return m_paddingValue;
|
return m_paddingValue;
|
||||||
}
|
}
|
||||||
inputIndex += (index - m_padding[0].first);
|
inputIndex += (index - m_padding[0].first);
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = index / m_outputStrides[i+1];
|
const Index idx = index / m_outputStrides[i+1];
|
||||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
if (isPaddingAtIndexForDim(idx, i)) {
|
||||||
return m_paddingValue;
|
return m_paddingValue;
|
||||||
}
|
}
|
||||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||||
index -= idx * m_outputStrides[i+1];
|
index -= idx * m_outputStrides[i+1];
|
||||||
}
|
}
|
||||||
if (index < m_padding[NumDims-1].first ||
|
if (isPaddingAtIndexForDim(index, NumDims-1)) {
|
||||||
index >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
|
|
||||||
return m_paddingValue;
|
return m_paddingValue;
|
||||||
}
|
}
|
||||||
inputIndex += (index - m_padding[NumDims-1].first);
|
inputIndex += (index - m_padding[NumDims-1].first);
|
||||||
@ -187,43 +186,6 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
return packetRowMajor(index);
|
return packetRowMajor(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
|
|
||||||
{
|
|
||||||
Index inputIndex;
|
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
||||||
{
|
|
||||||
const Index idx = coords[0];
|
|
||||||
if (idx < m_padding[0].first || idx >= m_dimensions[0] - m_padding[0].second) {
|
|
||||||
return m_paddingValue;
|
|
||||||
}
|
|
||||||
inputIndex = idx - m_padding[0].first;
|
|
||||||
}
|
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
|
||||||
const Index idx = coords[i];
|
|
||||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
|
||||||
return m_paddingValue;
|
|
||||||
}
|
|
||||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
{
|
|
||||||
const Index idx = coords[NumDims-1];
|
|
||||||
if (idx < m_padding[NumDims-1].first || idx >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) {
|
|
||||||
return m_paddingValue;
|
|
||||||
}
|
|
||||||
inputIndex = idx - m_padding[NumDims-1].first;
|
|
||||||
}
|
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
|
||||||
const Index idx = coords[i];
|
|
||||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
|
||||||
return m_paddingValue;
|
|
||||||
}
|
|
||||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return m_impl.coeff(inputIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||||
TensorOpCost cost = m_impl.costPerCoeff(vectorized);
|
TensorOpCost cost = m_impl.costPerCoeff(vectorized);
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
@ -239,6 +201,38 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
|
||||||
|
Index index, int dim_index) const {
|
||||||
|
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||||
|
return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
|
||||||
|
index < m_padding[dim_index].first) ||
|
||||||
|
(!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
|
||||||
|
index >= m_dimensions[dim_index] - m_padding[dim_index].second);
|
||||||
|
#else
|
||||||
|
return (index < m_padding[dim_index].first) ||
|
||||||
|
(index >= m_dimensions[dim_index] - m_padding[dim_index].second);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
|
||||||
|
int dim_index) const {
|
||||||
|
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||||
|
return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
|
||||||
|
int dim_index) const {
|
||||||
|
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||||
|
return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
|
void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
|
||||||
const double in = static_cast<double>(m_impl.dimensions()[i]);
|
const double in = static_cast<double>(m_impl.dimensions()[i]);
|
||||||
const double out = in + m_padding[i].first + m_padding[i].second;
|
const double out = in + m_padding[i].first + m_padding[i].second;
|
||||||
@ -273,15 +267,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
|
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
|
||||||
const Index lastPaddedRight = m_outputStrides[i+1];
|
const Index lastPaddedRight = m_outputStrides[i+1];
|
||||||
|
|
||||||
if (last < lastPaddedLeft) {
|
if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||||
// all the coefficient are between the 2 padding zones.
|
// all the coefficient are between the 2 padding zones.
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||||
@ -299,15 +293,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
|
const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
|
||||||
const Index lastPaddedRight = m_outputStrides[1];
|
const Index lastPaddedRight = m_outputStrides[1];
|
||||||
|
|
||||||
if (last < lastPaddedLeft) {
|
if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||||
// all the coefficient are between the 2 padding zones.
|
// all the coefficient are between the 2 padding zones.
|
||||||
inputIndex += (index - m_padding[0].first);
|
inputIndex += (index - m_padding[0].first);
|
||||||
return m_impl.template packet<Unaligned>(inputIndex);
|
return m_impl.template packet<Unaligned>(inputIndex);
|
||||||
@ -331,15 +325,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
|
const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
|
||||||
const Index lastPaddedRight = m_outputStrides[i];
|
const Index lastPaddedRight = m_outputStrides[i];
|
||||||
|
|
||||||
if (last < lastPaddedLeft) {
|
if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||||
// all the coefficient are between the 2 padding zones.
|
// all the coefficient are between the 2 padding zones.
|
||||||
const Index idx = index / m_outputStrides[i+1];
|
const Index idx = index / m_outputStrides[i+1];
|
||||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||||
@ -357,15 +351,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
|
const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
|
||||||
const Index lastPaddedRight = m_outputStrides[NumDims-1];
|
const Index lastPaddedRight = m_outputStrides[NumDims-1];
|
||||||
|
|
||||||
if (last < lastPaddedLeft) {
|
if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) {
|
||||||
// all the coefficient are in the padding zone.
|
// all the coefficient are in the padding zone.
|
||||||
return internal::pset1<PacketReturnType>(m_paddingValue);
|
return internal::pset1<PacketReturnType>(m_paddingValue);
|
||||||
}
|
}
|
||||||
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
|
||||||
// all the coefficient are between the 2 padding zones.
|
// all the coefficient are between the 2 padding zones.
|
||||||
inputIndex += (index - m_padding[NumDims-1].first);
|
inputIndex += (index - m_padding[NumDims-1].first);
|
||||||
return m_impl.template packet<Unaligned>(inputIndex);
|
return m_impl.template packet<Unaligned>(inputIndex);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user