mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Optimized the tensor padding code.
This commit is contained in:
parent
36fffe48f7
commit
2959045f2f
@ -87,7 +87,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
@ -100,15 +100,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
}
|
}
|
||||||
|
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
|
||||||
if (i > 0) {
|
|
||||||
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
|
|
||||||
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
|
|
||||||
} else {
|
|
||||||
m_inputStrides[0] = 1;
|
m_inputStrides[0] = 1;
|
||||||
m_outputStrides[0] = 1;
|
m_outputStrides[0] = 1;
|
||||||
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
|
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
|
||||||
|
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
|
||||||
}
|
}
|
||||||
}
|
m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
@ -128,7 +126,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
for (int i = NumDims - 1; i >= 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
|
||||||
return Scalar(0);
|
return Scalar(0);
|
||||||
@ -136,21 +134,90 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
|
if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
|
||||||
|
return Scalar(0);
|
||||||
|
}
|
||||||
|
inputIndex += (index - m_padding[0].first);
|
||||||
return m_impl.coeff(inputIndex);
|
return m_impl.coeff(inputIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* template<int LoadMode>
|
template<int LoadMode>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||||
{
|
{
|
||||||
return m_impl.template packet<LoadMode>(index);
|
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
}*/
|
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||||
|
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||||
|
|
||||||
|
const Index initialIndex = index;
|
||||||
|
Index inputIndex = 0;
|
||||||
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
|
const int first = index;
|
||||||
|
const int last = index + packetSize - 1;
|
||||||
|
const int lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
|
||||||
|
const int firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
|
||||||
|
const int lastPaddedRight = m_outputStrides[i+1];
|
||||||
|
|
||||||
|
if (last < lastPaddedLeft) {
|
||||||
|
// all the coefficient are in the padding zone.
|
||||||
|
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||||
|
}
|
||||||
|
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||||
|
// all the coefficient are in the padding zone.
|
||||||
|
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||||
|
}
|
||||||
|
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||||
|
// all the coefficient are between the 2 padding zones.
|
||||||
|
const Index idx = index / m_outputStrides[i];
|
||||||
|
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
|
||||||
|
index -= idx * m_outputStrides[i];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Every other case
|
||||||
|
return packetWithPossibleZero(initialIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const Index last = index + packetSize - 1;
|
||||||
|
const Index first = index;
|
||||||
|
const int lastPaddedLeft = m_padding[0].first;
|
||||||
|
const int firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
|
||||||
|
const int lastPaddedRight = m_outputStrides[1];
|
||||||
|
|
||||||
|
if (last < lastPaddedLeft) {
|
||||||
|
// all the coefficient are in the padding zone.
|
||||||
|
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||||
|
}
|
||||||
|
else if (first >= firstPaddedRight && last < lastPaddedRight) {
|
||||||
|
// all the coefficient are in the padding zone.
|
||||||
|
return internal::pset1<PacketReturnType>(Scalar(0));
|
||||||
|
}
|
||||||
|
else if (first >= lastPaddedLeft && last < firstPaddedRight) {
|
||||||
|
// all the coefficient are between the 2 padding zones.
|
||||||
|
inputIndex += (index - m_padding[0].first);
|
||||||
|
return m_impl.template packet<Unaligned>(inputIndex);
|
||||||
|
}
|
||||||
|
// Every other case
|
||||||
|
return packetWithPossibleZero(initialIndex);
|
||||||
|
}
|
||||||
|
|
||||||
Scalar* data() const { return NULL; }
|
Scalar* data() const { return NULL; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||||
|
{
|
||||||
|
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
|
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||||
|
for (int i = 0; i < packetSize; ++i) {
|
||||||
|
values[i] = coeff(index+i);
|
||||||
|
}
|
||||||
|
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||||
|
return rslt;
|
||||||
|
}
|
||||||
|
|
||||||
PaddingDimensions m_padding;
|
PaddingDimensions m_padding;
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims+1> m_outputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user