mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Use fast divisors in TensorGeneratorOp
This commit is contained in:
parent
b0d406d91c
commit
694084ecbd
@ -98,6 +98,8 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
RawAccess = false
|
||||
};
|
||||
|
||||
typedef internal::TensorIntDivisor<Index> IndexDivisor;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
: m_generator(op.generator())
|
||||
#ifdef EIGEN_USE_SYCL
|
||||
@ -118,6 +120,9 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
m_fast_strides[i] = IndexDivisor(m_strides[i]);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
@ -150,6 +155,8 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
return rslt;
|
||||
}
|
||||
|
||||
// TODO(ezhulenev): Add tiled evaluation support.
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
||||
costPerCoeff(bool) const {
|
||||
// TODO(rmlarsen): This is just a placeholder. Define interface to make
|
||||
@ -170,14 +177,14 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
void extract_coordinates(Index index, array<Index, NumDims>& coords) const {
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx = index / m_strides[i];
|
||||
const Index idx = index / m_fast_strides[i];
|
||||
index -= idx * m_strides[i];
|
||||
coords[i] = idx;
|
||||
}
|
||||
coords[0] = index;
|
||||
} else {
|
||||
for (int i = 0; i < NumDims - 1; ++i) {
|
||||
const Index idx = index / m_strides[i];
|
||||
const Index idx = index / m_fast_strides[i];
|
||||
index -= idx * m_strides[i];
|
||||
coords[i] = idx;
|
||||
}
|
||||
@ -187,6 +194,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
|
||||
Dimensions m_dimensions;
|
||||
array<Index, NumDims> m_strides;
|
||||
array<IndexDivisor, NumDims> m_fast_strides;
|
||||
Generator m_generator;
|
||||
#ifdef EIGEN_USE_SYCL
|
||||
TensorEvaluator<ArgType, Device> m_argImpl;
|
||||
|
Loading…
x
Reference in New Issue
Block a user