mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
Tensor block evaluation cost model
This commit is contained in:
parent
72166d0e6e
commit
ae07801dd8
@ -73,14 +73,68 @@ EIGEN_STRONG_INLINE DSizes<std::ptrdiff_t, sizeof...(Indices)> strides(
|
||||
enum class TensorBlockShapeType { kUniformAllDims, kSkewedInnerDims };
|
||||
|
||||
struct TensorBlockResourceRequirements {
|
||||
TensorBlockShapeType shape_type;
|
||||
size_t size;
|
||||
TensorBlockShapeType shape_type; // target block shape
|
||||
size_t size; // target block size
|
||||
TensorOpCost cost_per_coeff; // cost of computing a single block element
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize(
|
||||
TensorBlockShapeType shape_type, size_t size_in_bytes,
|
||||
TensorOpCost cost) {
|
||||
const size_t size = numext::maxi(size_t(1), size_in_bytes / sizeof(Scalar));
|
||||
return {shape_type, size, cost};
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize(
|
||||
TensorBlockShapeType shape_type, size_t size_in_bytes) {
|
||||
// This default cost per coefficient is valid for most materialized tensor
|
||||
// block evaluation implementations, because they typically just read
|
||||
// coefficients from the underlying tensor storage, and write to the tensor
|
||||
// block buffer (scratch or destination memory, reads and writes have linear
|
||||
// access pattern). We ignore the fixed cost of block evaluation, because in
|
||||
// practice it should negligible.
|
||||
//
|
||||
// Lazy block evaluation adds the cost of calling a functor for each
|
||||
// coefficient.
|
||||
//
|
||||
// All non-trivial block evaluation implementations must provide their own
|
||||
// cost approximation (e.g. shuffling inner dimension has a much higher cost
|
||||
// because it reads memory randomly, although the total number of moved
|
||||
// bytes is the same).
|
||||
return withShapeAndSize<Scalar>(shape_type, size_in_bytes,
|
||||
{/*bytes_loaded=*/sizeof(Scalar),
|
||||
/*bytes_stored=*/sizeof(Scalar),
|
||||
/*compute_cycles=*/0});
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements skewed(
|
||||
size_t size_in_bytes) {
|
||||
return withShapeAndSize<Scalar>(TensorBlockShapeType::kSkewedInnerDims,
|
||||
size_in_bytes);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements uniform(
|
||||
size_t size_in_bytes) {
|
||||
return withShapeAndSize<Scalar>(TensorBlockShapeType::kUniformAllDims,
|
||||
size_in_bytes);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements
|
||||
merge(const TensorBlockResourceRequirements &lhs,
|
||||
const TensorBlockResourceRequirements &rhs) {
|
||||
return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
|
||||
merge(const TensorBlockResourceRequirements& lhs,
|
||||
const TensorBlockResourceRequirements& rhs) {
|
||||
return {merge(lhs.shape_type, rhs.shape_type), // shape_type
|
||||
merge(lhs.size, rhs.size), // size
|
||||
merge(lhs.cost_per_coeff, rhs.cost_per_coeff)}; // cost_per_coeff
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC TensorBlockResourceRequirements& addCostPerCoeff(
|
||||
TensorOpCost cost) {
|
||||
cost_per_coeff += cost;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// This is a resource requirement that should be returned from expressions
|
||||
@ -88,10 +142,10 @@ struct TensorBlockResourceRequirements {
|
||||
// expression with raw buffer access).
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements any() {
|
||||
return {TensorBlockShapeType::kUniformAllDims, 1};
|
||||
return {TensorBlockShapeType::kUniformAllDims, 1, {0, 0, 0}};
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
using Requirements = TensorBlockResourceRequirements;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@ -100,13 +154,19 @@ private:
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE TensorBlockShapeType merge(TensorBlockShapeType lhs,
|
||||
TensorBlockShapeType rhs) {
|
||||
static EIGEN_STRONG_INLINE TensorBlockShapeType
|
||||
merge(TensorBlockShapeType lhs, TensorBlockShapeType rhs) {
|
||||
return (lhs == TensorBlockShapeType::kSkewedInnerDims ||
|
||||
rhs == TensorBlockShapeType::kSkewedInnerDims)
|
||||
? TensorBlockShapeType::kSkewedInnerDims
|
||||
: TensorBlockShapeType::kUniformAllDims;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE TensorOpCost merge(TensorOpCost lhs_cost,
|
||||
TensorOpCost rhs_cost) {
|
||||
return lhs_cost + rhs_cost;
|
||||
}
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------------- //
|
||||
@ -131,8 +191,9 @@ class TensorBlockDescriptor {
|
||||
class DestinationBuffer {
|
||||
public:
|
||||
enum DestinationBufferKind : int {
|
||||
// The above explicit specification of "int" as the enum basetype is needed
|
||||
// to get around a HIPCC link error ("the field type is not amp-compatible")
|
||||
// The above explicit specification of "int" as the enum basetype is
|
||||
// needed to get around a HIPCC link error ("the field type is not
|
||||
// amp-compatible")
|
||||
// which is issued for class members with the enum type.
|
||||
// TODO(rocm):
|
||||
// remove the "int" basetype once HIPCC has been fixed to not error out
|
||||
@ -280,7 +341,7 @@ class TensorBlockMapper {
|
||||
|
||||
TensorBlockMapper() = default;
|
||||
TensorBlockMapper(const DSizes<IndexType, NumDims>& dimensions,
|
||||
const TensorBlockResourceRequirements& requirements)
|
||||
const TensorBlockResourceRequirements& requirements)
|
||||
: m_tensor_dimensions(dimensions), m_requirements(requirements) {
|
||||
// Compute block dimensions and the total number of blocks.
|
||||
InitializeBlockDimensions();
|
||||
@ -299,8 +360,8 @@ class TensorBlockMapper {
|
||||
return m_block_dimensions;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
BlockDescriptor blockDescriptor(IndexType block_index) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockDescriptor
|
||||
blockDescriptor(IndexType block_index) const {
|
||||
static const bool isColMajor = Layout == static_cast<int>(ColMajor);
|
||||
|
||||
IndexType offset = 0;
|
||||
@ -416,7 +477,7 @@ class TensorBlockMapper {
|
||||
|
||||
eigen_assert(m_block_dimensions.TotalSize() >=
|
||||
numext::mini<IndexType>(target_block_size,
|
||||
m_tensor_dimensions.TotalSize()));
|
||||
m_tensor_dimensions.TotalSize()));
|
||||
|
||||
// Calculate block counts by dimension and total block count.
|
||||
DSizes<IndexType, NumDims> block_count;
|
||||
@ -761,7 +822,6 @@ class TensorMaterializedBlock {
|
||||
|
||||
template <typename UnaryOp, typename ArgTensorBlock>
|
||||
class TensorCwiseUnaryBlock {
|
||||
|
||||
static const bool NoArgBlockAccess =
|
||||
internal::is_void<typename ArgTensorBlock::XprType>::value;
|
||||
|
||||
@ -793,7 +853,6 @@ class TensorCwiseUnaryBlock {
|
||||
|
||||
template <typename BinaryOp, typename LhsTensorBlock, typename RhsTensorBlock>
|
||||
class TensorCwiseBinaryBlock {
|
||||
|
||||
static const bool NoArgBlockAccess =
|
||||
internal::is_void<typename LhsTensorBlock::XprType>::value ||
|
||||
internal::is_void<typename RhsTensorBlock::XprType>::value;
|
||||
@ -840,7 +899,6 @@ class TensorCwiseBinaryBlock {
|
||||
|
||||
template <typename BlockFactory, typename ArgTensorBlock>
|
||||
class TensorUnaryExprBlock {
|
||||
|
||||
typedef typename ArgTensorBlock::XprType ArgXprType;
|
||||
static const bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
|
||||
|
||||
@ -872,7 +930,6 @@ class TensorUnaryExprBlock {
|
||||
template <typename BlockFactory, typename Arg1TensorBlock,
|
||||
typename Arg2TensorBlock, typename Arg3TensorBlock>
|
||||
class TensorTernaryExprBlock {
|
||||
|
||||
typedef typename Arg1TensorBlock::XprType Arg1XprType;
|
||||
typedef typename Arg2TensorBlock::XprType Arg2XprType;
|
||||
typedef typename Arg3TensorBlock::XprType Arg3XprType;
|
||||
|
@ -620,12 +620,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
// TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
|
||||
// tensors. But this might need further tuning.
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
|
||||
const size_t target_size = m_device.firstLevelCacheSize();
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
m_impl.getResourceRequirements());
|
||||
m_impl.getResourceRequirements(),
|
||||
internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
|
@ -296,11 +296,9 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
|
||||
const size_t target_size = m_device.lastLevelCacheSize();
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
|
@ -521,7 +521,9 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
return m_argImpl.getResourceRequirements();
|
||||
static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
|
||||
return m_argImpl.getResourceRequirements().addCostPerCoeff(
|
||||
{0, 0, functor_cost / PacketSize});
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
@ -654,9 +656,11 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
m_leftImpl.getResourceRequirements(),
|
||||
m_rightImpl.getResourceRequirements());
|
||||
m_leftImpl.getResourceRequirements(),
|
||||
m_rightImpl.getResourceRequirements())
|
||||
.addCostPerCoeff({0, 0, functor_cost / PacketSize});
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
@ -934,11 +938,16 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
auto then_req = m_thenImpl.getResourceRequirements();
|
||||
auto else_req = m_elseImpl.getResourceRequirements();
|
||||
|
||||
auto merged_req =
|
||||
internal::TensorBlockResourceRequirements::merge(then_req, else_req);
|
||||
merged_req.cost_per_coeff =
|
||||
then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
|
||||
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
m_condImpl.getResourceRequirements(),
|
||||
internal::TensorBlockResourceRequirements::merge(
|
||||
m_thenImpl.getResourceRequirements(),
|
||||
m_elseImpl.getResourceRequirements()));
|
||||
m_condImpl.getResourceRequirements(), merged_req);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
|
@ -245,8 +245,8 @@ TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
|
||||
evaluator.getResourceRequirements();
|
||||
|
||||
// Update target block size based on cost model.
|
||||
TensorOpCost cost = evaluator.costPerCoeff(Vectorizable);
|
||||
double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(1, cost);
|
||||
double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(
|
||||
1, requirements.cost_per_coeff);
|
||||
requirements.size = static_cast<size_t>(1.0 / taskSize);
|
||||
|
||||
TensorBlockMapper block_mapper(
|
||||
@ -259,7 +259,8 @@ TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
|
||||
align *
|
||||
divup<size_t>(block_size * sizeof(typename Evaluator::Scalar), align);
|
||||
|
||||
return {block_mapper, cost * block_size, aligned_blocksize};
|
||||
return {block_mapper, requirements.cost_per_coeff * block_size,
|
||||
aligned_blocksize};
|
||||
}
|
||||
|
||||
template <typename Evaluator, typename StorageIndex, bool Vectorizable>
|
||||
|
@ -166,10 +166,10 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
return {internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
const size_t target_size = m_device.firstLevelCacheSize();
|
||||
// TODO(ezhulenev): Generator should have a cost.
|
||||
return internal::TensorBlockResourceRequirements::skewed<Scalar>(
|
||||
target_size);
|
||||
}
|
||||
|
||||
struct BlockIteratorState {
|
||||
|
@ -634,10 +634,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
const size_t target_size = m_device.lastLevelCacheSize();
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
|
@ -229,10 +229,9 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
const size_t target_size = m_device.lastLevelCacheSize();
|
||||
return internal::TensorBlockResourceRequirements::merge(
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size},
|
||||
internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
|
||||
m_impl.getResourceRequirements());
|
||||
}
|
||||
|
||||
|
@ -246,10 +246,12 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
internal::TensorBlockResourceRequirements getResourceRequirements() const {
|
||||
const size_t target_block_size =
|
||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||
return {internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
const size_t target_size = m_device.lastLevelCacheSize();
|
||||
// Block evaluation reads underlying memory in reverse order, and default
|
||||
// cost model does not properly catch this in bytes stored/loaded.
|
||||
return internal::TensorBlockResourceRequirements::skewed<Scalar>(
|
||||
target_size)
|
||||
.addCostPerCoeff({0, 0, 24});
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
|
@ -249,14 +249,21 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
static const int inner_dim =
|
||||
Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
|
||||
|
||||
const size_t target_block_size = numext::maxi<size_t>(
|
||||
1, m_device.firstLevelCacheSize() / sizeof(Scalar));
|
||||
|
||||
const size_t target_size = m_device.firstLevelCacheSize();
|
||||
const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
|
||||
return {inner_dim_shuffled
|
||||
? internal::TensorBlockShapeType::kUniformAllDims
|
||||
: internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
target_block_size};
|
||||
|
||||
// Shuffled inner dimensions leads to a random memory access, which is not
|
||||
// captured by default cost model bytes loaded/stored. We add this cost
|
||||
// explicitly. The number of cycles picked based on the benchmarks.
|
||||
// TODO(ezhulenev): This number was picked based on a very questionable
|
||||
// benchmarks, add benchmarks that are representative of real workloads.
|
||||
using BlockRequirements = internal::TensorBlockResourceRequirements;
|
||||
if (inner_dim_shuffled) {
|
||||
return BlockRequirements::uniform<Scalar>(target_size)
|
||||
.addCostPerCoeff({0, 0, NumDims * 28});
|
||||
} else {
|
||||
return BlockRequirements::skewed<Scalar>(target_size);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
|
||||
|
@ -21,6 +21,7 @@ using Eigen::RowMajor;
|
||||
using Eigen::ColMajor;
|
||||
using Eigen::internal::TensorBlockShapeType;
|
||||
|
||||
static TensorOpCost zeroCost() { return {0, 0, 0}; }
|
||||
|
||||
template<typename T>
|
||||
static const T& choose(int layout, const T& col, const T& row) {
|
||||
@ -73,7 +74,7 @@ static void test_block_mapper_sanity()
|
||||
|
||||
// Test uniform blocks.
|
||||
TensorBlockMapper uniform_block_mapper(
|
||||
tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100});
|
||||
tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100, zeroCost()});
|
||||
|
||||
VERIFY_IS_EQUAL(uniform_block_mapper.blockCount(), 100);
|
||||
VERIFY_IS_EQUAL(uniform_block_mapper.blockTotalSize(), 100);
|
||||
@ -85,7 +86,7 @@ static void test_block_mapper_sanity()
|
||||
|
||||
// Test skewed to inner dims blocks.
|
||||
TensorBlockMapper skewed_block_mapper(
|
||||
tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100});
|
||||
tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100, zeroCost()});
|
||||
|
||||
VERIFY_IS_EQUAL(skewed_block_mapper.blockCount(), 100);
|
||||
VERIFY_IS_EQUAL(skewed_block_mapper.blockTotalSize(), 100);
|
||||
@ -130,7 +131,8 @@ static void test_block_mapper_maps_every_element() {
|
||||
std::set<Index> coeff_set;
|
||||
|
||||
// Try different combinations of block types and sizes.
|
||||
TensorBlockMapper block_mapper(dims, {RandomShape(), RandomTargetSize(dims)});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims, {RandomShape(), RandomTargetSize(dims), zeroCost()});
|
||||
|
||||
for (int i = 0; i < block_mapper.blockCount(); ++i) {
|
||||
auto block = block_mapper.blockDescriptor(i);
|
||||
@ -233,9 +235,8 @@ static void test_uniform_block_shape()
|
||||
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
|
||||
@ -248,9 +249,8 @@ static void test_uniform_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||
for (int i = 1; i < 5; ++i) {
|
||||
@ -260,9 +260,8 @@ static void test_uniform_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
@ -276,9 +275,8 @@ static void test_uniform_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
for (int i = 1; i < 5; ++i) {
|
||||
@ -288,9 +286,8 @@ static void test_uniform_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
@ -304,9 +301,8 @@ static void test_uniform_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||
@ -317,9 +313,8 @@ static void test_uniform_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||
const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
VERIFY_IS_EQUAL(6, block.dimensions()[3]);
|
||||
@ -333,9 +328,8 @@ static void test_uniform_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||
@ -346,9 +340,8 @@ static void test_uniform_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||
const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims,
|
||||
max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
VERIFY_IS_EQUAL(9, block.dimensions()[3]);
|
||||
@ -369,9 +362,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(10, block.dimensions()[0]);
|
||||
for (int i = 1; i < 5; ++i) {
|
||||
@ -381,9 +374,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
@ -396,9 +389,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
for (int i = 1; i < 5; ++i) {
|
||||
@ -408,9 +401,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
@ -424,9 +417,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
VERIFY_IS_EQUAL(3, block.dimensions()[1]);
|
||||
@ -437,9 +430,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
VERIFY_IS_EQUAL(15, block.dimensions()[3]);
|
||||
@ -454,9 +447,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||
@ -468,9 +461,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
|
||||
@ -485,9 +478,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
if (Layout == ColMajor) {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||
@ -498,9 +491,9 @@ static void test_skewed_inner_dim_block_shape()
|
||||
} else {
|
||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||
TensorBlockMapper
|
||||
block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims,
|
||||
max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims,
|
||||
{TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()});
|
||||
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
|
||||
@ -524,7 +517,8 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
||||
|
||||
DSizes<Index, 1> dims(0);
|
||||
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||
TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims, {block_shape, max_coeff_count, zeroCost()});
|
||||
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
|
||||
VERIFY(block_mapper.blockTotalSize() >= 1);
|
||||
}
|
||||
@ -537,7 +531,8 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
||||
for (int dim2 = 0; dim2 < 3; ++dim2) {
|
||||
DSizes<Index, 2> dims(dim1, dim2);
|
||||
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||
TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims, {block_shape, max_coeff_count, zeroCost()});
|
||||
if (dim1 * dim2 == 0) {
|
||||
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
|
||||
}
|
||||
|
@ -64,7 +64,8 @@ static TensorBlockParams<NumDims> SkewedInnerBlock(
|
||||
using BlockMapper = internal::TensorBlockMapper<NumDims, Layout, Index>;
|
||||
BlockMapper block_mapper(dims,
|
||||
{internal::TensorBlockShapeType::kSkewedInnerDims,
|
||||
internal::random<size_t>(1, dims.TotalSize())});
|
||||
internal::random<size_t>(1, dims.TotalSize()),
|
||||
{0, 0, 0}});
|
||||
|
||||
Index total_blocks = block_mapper.blockCount();
|
||||
Index block_index = internal::random<Index>(0, total_blocks - 1);
|
||||
|
@ -75,8 +75,8 @@ static void test_block_io_copy_data_from_source_to_target() {
|
||||
// Construct a tensor block mapper.
|
||||
using TensorBlockMapper =
|
||||
internal::TensorBlockMapper<NumDims, Layout, Index>;
|
||||
TensorBlockMapper block_mapper(dims, {RandomBlockShape(),
|
||||
RandomTargetBlockSize(dims)});
|
||||
TensorBlockMapper block_mapper(
|
||||
dims, {RandomBlockShape(), RandomTargetBlockSize(dims), {0, 0, 0}});
|
||||
|
||||
// We will copy data from input to output through this buffer.
|
||||
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
|
||||
@ -146,8 +146,10 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
||||
// NOTE: Tensor block mapper works with shuffled dimensions.
|
||||
using TensorBlockMapper =
|
||||
internal::TensorBlockMapper<NumDims, Layout, Index>;
|
||||
TensorBlockMapper block_mapper(output_tensor_dims, {RandomBlockShape(),
|
||||
RandomTargetBlockSize(output_tensor_dims)});
|
||||
TensorBlockMapper block_mapper(output_tensor_dims,
|
||||
{RandomBlockShape(),
|
||||
RandomTargetBlockSize(output_tensor_dims),
|
||||
{0, 0, 0}});
|
||||
|
||||
// We will copy data from input to output through this buffer.
|
||||
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
|
||||
|
Loading…
x
Reference in New Issue
Block a user