mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-02 00:34:14 +08:00
Refined the cost of the striding operation.
This commit is contained in:
parent
5c372d19e3
commit
fd9401f260
@ -209,11 +209,17 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||||
const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
|
double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
|
||||||
2 * TensorOpCost::MulCost<Index>() +
|
TensorOpCost::MulCost<Index>() +
|
||||||
TensorOpCost::DivCost<Index>());
|
TensorOpCost::DivCost<Index>()) +
|
||||||
return m_impl.costPerCoeff(vectorized) +
|
TensorOpCost::MulCost<Index>();
|
||||||
TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
|
if (vectorized) {
|
||||||
|
compute_cost *= 2; // packet() computes two indices
|
||||||
|
}
|
||||||
|
const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
|
||||||
|
return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
|
||||||
|
// Computation is not vectorized per se, but it is done once per packet.
|
||||||
|
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user