mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-30 16:52:01 +08:00
Remove TensorBlock.h and old TensorBlock/BlockMapper
This commit is contained in:
parent
c49f0d851a
commit
dbca11e880
@ -97,7 +97,6 @@ typedef unsigned __int64 uint64_t;
|
|||||||
#include "src/Tensor/TensorGlobalFunctions.h"
|
#include "src/Tensor/TensorGlobalFunctions.h"
|
||||||
|
|
||||||
#include "src/Tensor/TensorBase.h"
|
#include "src/Tensor/TensorBase.h"
|
||||||
#include "src/Tensor/TensorBlock.h"
|
|
||||||
#include "src/Tensor/TensorBlockV2.h"
|
#include "src/Tensor/TensorBlockV2.h"
|
||||||
|
|
||||||
#include "src/Tensor/TensorEvaluator.h"
|
#include "src/Tensor/TensorEvaluator.h"
|
||||||
|
@ -116,20 +116,12 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
|||||||
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
|
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::TensorBlock<
|
|
||||||
typename internal::remove_const<Scalar>::type, Index, NumDims, Layout>
|
|
||||||
TensorBlock;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
|
|
||||||
typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlockV2
|
typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlockV2
|
||||||
RightTensorBlock;
|
RightTensorBlock;
|
||||||
|
|
||||||
typedef internal::TensorBlockAssignment<
|
|
||||||
Scalar, NumDims, typename RightTensorBlock::XprType, Index>
|
|
||||||
TensorBlockAssignment;
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
|
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
|
||||||
|
@ -1,305 +0,0 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
|
||||||
// for linear algebra.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2018 Andy Davis <andydavis@google.com>
|
|
||||||
// Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
|
|
||||||
//
|
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
||||||
|
|
||||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
|
||||||
#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
|
||||||
|
|
||||||
namespace Eigen {
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Helper template to choose between ColMajor and RowMajor values.
|
|
||||||
template <int Layout>
|
|
||||||
struct cond;
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct cond<ColMajor> {
|
|
||||||
template <typename T>
|
|
||||||
EIGEN_STRONG_INLINE const T& operator()(const T& col,
|
|
||||||
const T& /*row*/) const {
|
|
||||||
return col;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct cond<RowMajor> {
|
|
||||||
template <typename T>
|
|
||||||
EIGEN_STRONG_INLINE const T& operator()(const T& /*col*/,
|
|
||||||
const T& row) const {
|
|
||||||
return row;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \enum TensorBlockShapeType
|
|
||||||
* \ingroup CXX11_Tensor_Module
|
|
||||||
*
|
|
||||||
* \brief Tensor block shape type.
|
|
||||||
*
|
|
||||||
* Tensor block shape type defines what are the shape preference for the blocks
|
|
||||||
* extracted from the larger tensor.
|
|
||||||
*
|
|
||||||
* Example:
|
|
||||||
*
|
|
||||||
* We want to extract blocks of 100 elements from the large 100x100 tensor:
|
|
||||||
* - tensor: 100x100
|
|
||||||
* - target_block_size: 100
|
|
||||||
*
|
|
||||||
* TensorBlockShapeType:
|
|
||||||
* - kUniformAllDims: 100 blocks of size 10x10
|
|
||||||
* - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column
|
|
||||||
* or row major layout)
|
|
||||||
*/
|
|
||||||
enum TensorBlockShapeType {
|
|
||||||
kUniformAllDims,
|
|
||||||
kSkewedInnerDims
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \class TensorBlock
|
|
||||||
* \ingroup CXX11_Tensor_Module
|
|
||||||
*
|
|
||||||
* \brief Tensor block class.
|
|
||||||
*
|
|
||||||
* This class represents a tensor block specified by the index of the
|
|
||||||
* first block coefficient, and the size of the block in each dimension.
|
|
||||||
*/
|
|
||||||
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
|
||||||
class TensorBlock {
|
|
||||||
public:
|
|
||||||
typedef DSizes<StorageIndex, NumDims> Dimensions;
|
|
||||||
|
|
||||||
TensorBlock(const StorageIndex first_coeff_index, const Dimensions& block_sizes,
|
|
||||||
const Dimensions& block_strides, const Dimensions& tensor_strides,
|
|
||||||
Scalar* data)
|
|
||||||
: m_first_coeff_index(first_coeff_index),
|
|
||||||
m_block_sizes(block_sizes),
|
|
||||||
m_block_strides(block_strides),
|
|
||||||
m_tensor_strides(tensor_strides),
|
|
||||||
m_data(data) {}
|
|
||||||
|
|
||||||
StorageIndex first_coeff_index() const { return m_first_coeff_index; }
|
|
||||||
|
|
||||||
const Dimensions& block_sizes() const { return m_block_sizes; }
|
|
||||||
|
|
||||||
const Dimensions& block_strides() const { return m_block_strides; }
|
|
||||||
|
|
||||||
const Dimensions& tensor_strides() const { return m_tensor_strides; }
|
|
||||||
|
|
||||||
Scalar* data() { return m_data; }
|
|
||||||
|
|
||||||
const Scalar* data() const { return m_data; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
StorageIndex m_first_coeff_index;
|
|
||||||
Dimensions m_block_sizes;
|
|
||||||
Dimensions m_block_strides;
|
|
||||||
Dimensions m_tensor_strides;
|
|
||||||
Scalar* m_data; // Not owned.
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \class TensorBlockMapper
|
|
||||||
* \ingroup CXX11_Tensor_Module
|
|
||||||
*
|
|
||||||
* \brief Tensor block mapper class.
|
|
||||||
*
|
|
||||||
* This class is responsible for iterating over the blocks of a tensor.
|
|
||||||
*/
|
|
||||||
template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
|
|
||||||
class TensorBlockMapper {
|
|
||||||
public:
|
|
||||||
typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
|
|
||||||
typedef DSizes<StorageIndex, NumDims> Dimensions;
|
|
||||||
|
|
||||||
TensorBlockMapper() {}
|
|
||||||
TensorBlockMapper(const Dimensions& dims,
|
|
||||||
const TensorBlockShapeType block_shape,
|
|
||||||
Index min_target_size)
|
|
||||||
: m_dimensions(dims),
|
|
||||||
m_block_dim_sizes(BlockDimensions(dims, block_shape, convert_index<StorageIndex>(min_target_size))) {
|
|
||||||
// Calculate block counts by dimension and total block count.
|
|
||||||
DSizes<StorageIndex, NumDims> block_count;
|
|
||||||
for (Index i = 0; i < block_count.rank(); ++i) {
|
|
||||||
block_count[i] = divup(m_dimensions[i], m_block_dim_sizes[i]);
|
|
||||||
}
|
|
||||||
m_total_block_count = array_prod(block_count);
|
|
||||||
|
|
||||||
// Calculate block strides (used for enumerating blocks).
|
|
||||||
if (NumDims > 0) {
|
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
||||||
m_block_strides[0] = 1;
|
|
||||||
m_tensor_strides[0] = 1;
|
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
|
||||||
m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1];
|
|
||||||
m_tensor_strides[i] = m_tensor_strides[i - 1] * m_dimensions[i - 1];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
m_block_strides[NumDims - 1] = 1;
|
|
||||||
m_tensor_strides[NumDims - 1] = 1;
|
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
|
||||||
m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1];
|
|
||||||
m_tensor_strides[i] = m_tensor_strides[i + 1] * m_dimensions[i + 1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
|
|
||||||
GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
|
|
||||||
StorageIndex first_coeff_index = 0;
|
|
||||||
DSizes<StorageIndex, NumDims> coords;
|
|
||||||
DSizes<StorageIndex, NumDims> sizes;
|
|
||||||
DSizes<StorageIndex, NumDims> strides;
|
|
||||||
if (NumDims > 0) {
|
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
|
||||||
const StorageIndex idx = block_index / m_block_strides[i];
|
|
||||||
coords[i] = idx * m_block_dim_sizes[i];
|
|
||||||
sizes[i] =
|
|
||||||
numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
|
|
||||||
block_index -= idx * m_block_strides[i];
|
|
||||||
first_coeff_index += coords[i] * m_tensor_strides[i];
|
|
||||||
}
|
|
||||||
coords[0] = block_index * m_block_dim_sizes[0];
|
|
||||||
sizes[0] =
|
|
||||||
numext::mini((m_dimensions[0] - coords[0]), m_block_dim_sizes[0]);
|
|
||||||
first_coeff_index += coords[0] * m_tensor_strides[0];
|
|
||||||
|
|
||||||
strides[0] = 1;
|
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
|
||||||
strides[i] = strides[i - 1] * sizes[i - 1];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
|
||||||
const StorageIndex idx = block_index / m_block_strides[i];
|
|
||||||
coords[i] = idx * m_block_dim_sizes[i];
|
|
||||||
sizes[i] =
|
|
||||||
numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
|
|
||||||
block_index -= idx * m_block_strides[i];
|
|
||||||
first_coeff_index += coords[i] * m_tensor_strides[i];
|
|
||||||
}
|
|
||||||
coords[NumDims - 1] = block_index * m_block_dim_sizes[NumDims - 1];
|
|
||||||
sizes[NumDims - 1] =
|
|
||||||
numext::mini((m_dimensions[NumDims - 1] - coords[NumDims - 1]),
|
|
||||||
m_block_dim_sizes[NumDims - 1]);
|
|
||||||
first_coeff_index +=
|
|
||||||
coords[NumDims - 1] * m_tensor_strides[NumDims - 1];
|
|
||||||
|
|
||||||
strides[NumDims - 1] = 1;
|
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
|
||||||
strides[i] = strides[i + 1] * sizes[i + 1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
|
|
||||||
return m_total_block_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex
|
|
||||||
block_dims_total_size() const {
|
|
||||||
return m_block_dim_sizes.TotalSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&
|
|
||||||
block_dim_sizes() const {
|
|
||||||
return m_block_dim_sizes;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
static Dimensions BlockDimensions(const Dimensions& tensor_dims,
|
|
||||||
const TensorBlockShapeType block_shape,
|
|
||||||
StorageIndex min_target_size) {
|
|
||||||
min_target_size = numext::maxi<StorageIndex>(1, min_target_size);
|
|
||||||
|
|
||||||
// If tensor fully fits into the target size, we'll treat it a single block.
|
|
||||||
Dimensions block_dim_sizes = tensor_dims;
|
|
||||||
|
|
||||||
if (tensor_dims.TotalSize() == 0) {
|
|
||||||
// Corner case: one of the dimensions is zero. Logic below is too complex
|
|
||||||
// to handle this case on a general basis, just use unit block size.
|
|
||||||
// Note: we must not yield blocks with zero dimensions (recipe for
|
|
||||||
// overflows/underflows, divisions by zero and NaNs later).
|
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
|
||||||
block_dim_sizes[i] = 1;
|
|
||||||
}
|
|
||||||
} else if (block_dim_sizes.TotalSize() > min_target_size) {
|
|
||||||
if (block_shape == kUniformAllDims) {
|
|
||||||
// Tensor will not fit within 'min_target_size' budget: calculate tensor
|
|
||||||
// block dimension sizes based on "square" dimension size target.
|
|
||||||
const StorageIndex dim_size_target = convert_index<StorageIndex>(
|
|
||||||
std::pow(static_cast<float>(min_target_size),
|
|
||||||
1.0f / static_cast<float>(block_dim_sizes.rank())));
|
|
||||||
for (Index i = 0; i < block_dim_sizes.rank(); ++i) {
|
|
||||||
// TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
|
|
||||||
// a multiple of the packet size. Note that reducing
|
|
||||||
// 'block_dim_size' in this manner can increase the number of
|
|
||||||
// blocks, and so will amplify any per-block overhead.
|
|
||||||
block_dim_sizes[i] = numext::mini(dim_size_target, tensor_dims[i]);
|
|
||||||
}
|
|
||||||
// Add any un-allocated coefficients to inner dimension(s).
|
|
||||||
StorageIndex total_size = block_dim_sizes.TotalSize();
|
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
|
||||||
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
|
||||||
if (block_dim_sizes[dim] < tensor_dims[dim]) {
|
|
||||||
const StorageIndex total_size_other_dims =
|
|
||||||
total_size / block_dim_sizes[dim];
|
|
||||||
const StorageIndex alloc_avail =
|
|
||||||
divup<StorageIndex>(min_target_size, total_size_other_dims);
|
|
||||||
if (alloc_avail == block_dim_sizes[dim]) {
|
|
||||||
// Insufficient excess coefficients to allocate.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
block_dim_sizes[dim] = numext::mini(tensor_dims[dim], alloc_avail);
|
|
||||||
total_size = total_size_other_dims * block_dim_sizes[dim];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (block_shape == kSkewedInnerDims) {
|
|
||||||
StorageIndex coeff_to_allocate = min_target_size;
|
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
|
||||||
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
|
||||||
block_dim_sizes[dim] =
|
|
||||||
numext::mini(coeff_to_allocate, tensor_dims[dim]);
|
|
||||||
coeff_to_allocate = divup(
|
|
||||||
coeff_to_allocate,
|
|
||||||
numext::maxi(static_cast<StorageIndex>(1), block_dim_sizes[dim]));
|
|
||||||
}
|
|
||||||
eigen_assert(coeff_to_allocate == 1);
|
|
||||||
} else {
|
|
||||||
eigen_assert(false); // someone added new block shape type
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
eigen_assert(
|
|
||||||
block_dim_sizes.TotalSize() >=
|
|
||||||
numext::mini<Index>(min_target_size, tensor_dims.TotalSize()));
|
|
||||||
|
|
||||||
return block_dim_sizes;
|
|
||||||
}
|
|
||||||
|
|
||||||
Dimensions m_dimensions;
|
|
||||||
Dimensions m_block_dim_sizes;
|
|
||||||
Dimensions m_block_strides;
|
|
||||||
Dimensions m_tensor_strides;
|
|
||||||
StorageIndex m_total_block_count;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace internal
|
|
||||||
|
|
||||||
} // namespace Eigen
|
|
||||||
|
|
||||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
|
|
@ -76,12 +76,6 @@ struct TensorBlockV2ResourceRequirements {
|
|||||||
TensorBlockV2ShapeType shape_type;
|
TensorBlockV2ShapeType shape_type;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
||||||
TensorBlockShapeType shapeV1() const {
|
|
||||||
return shape_type == TensorBlockV2ShapeType::kUniformAllDims
|
|
||||||
? internal::kUniformAllDims
|
|
||||||
: internal::kSkewedInnerDims;
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
|
static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
|
||||||
merge(const TensorBlockV2ResourceRequirements &lhs,
|
merge(const TensorBlockV2ResourceRequirements &lhs,
|
||||||
@ -274,6 +268,168 @@ class TensorBlockDescriptor {
|
|||||||
DestinationBuffer m_destination;
|
DestinationBuffer m_destination;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------- //
|
||||||
|
// TensorBlockMapper is responsible for iterating over the blocks of a tensor.
|
||||||
|
|
||||||
|
template <int NumDims, int Layout, typename IndexType = Eigen::Index>
|
||||||
|
class TensorBlockV2Mapper {
|
||||||
|
typedef TensorBlockDescriptor<NumDims, IndexType> BlockDescriptor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef DSizes<IndexType, NumDims> Dimensions;
|
||||||
|
|
||||||
|
TensorBlockV2Mapper() = default;
|
||||||
|
TensorBlockV2Mapper(const DSizes<IndexType, NumDims>& dimensions,
|
||||||
|
const TensorBlockV2ResourceRequirements& requirements)
|
||||||
|
: m_tensor_dimensions(dimensions), m_requirements(requirements) {
|
||||||
|
// Initialize `m_block_dimensions`.
|
||||||
|
InitializeBlockDimensions();
|
||||||
|
|
||||||
|
// Calculate block counts by dimension and total block count.
|
||||||
|
DSizes<IndexType, NumDims> block_count;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
block_count[i] = divup(m_tensor_dimensions[i], m_block_dimensions[i]);
|
||||||
|
}
|
||||||
|
m_total_block_count = array_prod(block_count);
|
||||||
|
|
||||||
|
// Calculate block strides (used for enumerating blocks).
|
||||||
|
m_tensor_strides = strides<Layout>(m_tensor_dimensions);
|
||||||
|
m_block_strides = strides<Layout>(block_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockCount() const {
|
||||||
|
return m_total_block_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockTotalSize() const {
|
||||||
|
return m_block_dimensions.TotalSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes<IndexType, NumDims>&
|
||||||
|
blockDimensions() const {
|
||||||
|
return m_block_dimensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
BlockDescriptor blockDescriptor(IndexType block_index) const {
|
||||||
|
static const bool isColMajor = Layout == static_cast<int>(ColMajor);
|
||||||
|
|
||||||
|
IndexType offset = 0;
|
||||||
|
DSizes<IndexType, NumDims> dimensions;
|
||||||
|
|
||||||
|
if (NumDims == 0) return BlockDescriptor(offset, dimensions);
|
||||||
|
|
||||||
|
// Iterate outer -> inner dimensions.
|
||||||
|
for (int i = NumDims - 1; i >= 0; --i) {
|
||||||
|
const int dim = isColMajor ? i : NumDims - i - 1;
|
||||||
|
|
||||||
|
const IndexType idx = block_index / m_block_strides[dim];
|
||||||
|
block_index -= idx * m_block_strides[dim];
|
||||||
|
|
||||||
|
const IndexType coord = idx * m_block_dimensions[dim];
|
||||||
|
dimensions[dim] = numext::mini(m_tensor_dimensions[dim] - coord,
|
||||||
|
m_block_dimensions[dim]);
|
||||||
|
offset += coord * m_tensor_strides[dim];
|
||||||
|
}
|
||||||
|
|
||||||
|
return {offset, dimensions};
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void InitializeBlockDimensions() {
|
||||||
|
// Requested block shape and size.
|
||||||
|
const TensorBlockV2ShapeType shape_type = m_requirements.shape_type;
|
||||||
|
const IndexType target_block_size =
|
||||||
|
numext::maxi<IndexType>(1, static_cast<IndexType>(m_requirements.size));
|
||||||
|
|
||||||
|
// Corner case: one of the dimensions is zero. Logic below is too complex
|
||||||
|
// to handle this case on a general basis, just use unit block size.
|
||||||
|
// Note: we must not yield blocks with zero dimensions (recipe for
|
||||||
|
// overflows/underflows, divisions by zero and NaNs later).
|
||||||
|
if (m_tensor_dimensions.TotalSize() == 0) {
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
m_block_dimensions[i] = 1;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If tensor fits into a target block size, evaluate it as a single block.
|
||||||
|
if (m_tensor_dimensions.TotalSize() <= target_block_size) {
|
||||||
|
m_block_dimensions = m_tensor_dimensions;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const bool isColMajor = Layout == static_cast<int>(ColMajor);
|
||||||
|
|
||||||
|
// Block shape skewed towards inner dimension.
|
||||||
|
if (shape_type == TensorBlockV2ShapeType::kSkewedInnerDims) {
|
||||||
|
IndexType coeff_to_allocate = target_block_size;
|
||||||
|
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
const int dim = isColMajor ? i : NumDims - i - 1;
|
||||||
|
m_block_dimensions[dim] =
|
||||||
|
numext::mini(coeff_to_allocate, m_tensor_dimensions[dim]);
|
||||||
|
coeff_to_allocate = divup(
|
||||||
|
coeff_to_allocate,
|
||||||
|
numext::maxi(static_cast<IndexType>(1), m_block_dimensions[dim]));
|
||||||
|
}
|
||||||
|
eigen_assert(coeff_to_allocate == 1);
|
||||||
|
|
||||||
|
} else if (shape_type == TensorBlockV2ShapeType::kUniformAllDims) {
|
||||||
|
// Tensor will not fit within 'target_block_size' budget: calculate tensor
|
||||||
|
// block dimension sizes based on "square" dimension size target.
|
||||||
|
const IndexType dim_size_target = convert_index<IndexType>(
|
||||||
|
std::pow(static_cast<float>(target_block_size),
|
||||||
|
1.0f / static_cast<float>(m_block_dimensions.rank())));
|
||||||
|
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
// TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
|
||||||
|
// a multiple of the packet size. Note that reducing
|
||||||
|
// 'block_dim_size' in this manner can increase the number of
|
||||||
|
// blocks, and so will amplify any per-block overhead.
|
||||||
|
m_block_dimensions[i] =
|
||||||
|
numext::mini(dim_size_target, m_tensor_dimensions[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any un-allocated coefficients to inner dimension(s).
|
||||||
|
IndexType total_size = m_block_dimensions.TotalSize();
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
const int dim = isColMajor ? i : NumDims - i - 1;
|
||||||
|
|
||||||
|
if (m_block_dimensions[dim] < m_tensor_dimensions[dim]) {
|
||||||
|
const IndexType total_size_other_dims =
|
||||||
|
total_size / m_block_dimensions[dim];
|
||||||
|
const IndexType alloc_avail =
|
||||||
|
divup<IndexType>(target_block_size, total_size_other_dims);
|
||||||
|
if (alloc_avail == m_block_dimensions[dim]) {
|
||||||
|
// Insufficient excess coefficients to allocate.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
m_block_dimensions[dim] =
|
||||||
|
numext::mini(m_tensor_dimensions[dim], alloc_avail);
|
||||||
|
total_size = total_size_other_dims * m_block_dimensions[dim];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
eigen_assert(false); // unknown block shape
|
||||||
|
}
|
||||||
|
|
||||||
|
eigen_assert(m_block_dimensions.TotalSize() >=
|
||||||
|
numext::mini<IndexType>(target_block_size,
|
||||||
|
m_tensor_dimensions.TotalSize()));
|
||||||
|
}
|
||||||
|
|
||||||
|
DSizes<IndexType, NumDims> m_tensor_dimensions;
|
||||||
|
TensorBlockV2ResourceRequirements m_requirements;
|
||||||
|
|
||||||
|
DSizes<IndexType, NumDims> m_block_dimensions;
|
||||||
|
IndexType m_total_block_count;
|
||||||
|
|
||||||
|
DSizes<IndexType, NumDims> m_tensor_strides;
|
||||||
|
DSizes<IndexType, NumDims> m_block_strides;
|
||||||
|
};
|
||||||
|
|
||||||
// -------------------------------------------------------------------------- //
|
// -------------------------------------------------------------------------- //
|
||||||
// TensorBlockScratchAllocator is responsible for allocating temporary buffers
|
// TensorBlockScratchAllocator is responsible for allocating temporary buffers
|
||||||
// for block evaluation (output or input block materialization). Given that
|
// for block evaluation (output or input block materialization). Given that
|
||||||
|
@ -447,13 +447,6 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
|
||||||
|
|
||||||
typedef internal::TensorBlock<ScalarNoConst, Index, NumInputDims, Layout>
|
|
||||||
InputTensorBlock;
|
|
||||||
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
|
|
||||||
OutputTensorBlock;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
@ -506,50 +499,6 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
|
||||||
const OutputTensorBlock& output_block) {
|
|
||||||
// Calculate input block sizes.
|
|
||||||
const DSizes<Index, NumDims>& output_block_sizes =
|
|
||||||
output_block.block_sizes();
|
|
||||||
const DSizes<Index, NumDims>& output_block_strides =
|
|
||||||
output_block.block_strides();
|
|
||||||
const Index chip_dim = this->m_dim.actualDim();
|
|
||||||
DSizes<Index, NumInputDims> input_block_sizes;
|
|
||||||
DSizes<Index, NumInputDims> input_block_strides;
|
|
||||||
for (Index i = 0; i < NumInputDims; ++i) {
|
|
||||||
if (i < chip_dim) {
|
|
||||||
input_block_sizes[i] = output_block_sizes[i];
|
|
||||||
input_block_strides[i] = output_block_strides[i];
|
|
||||||
} else if (i > chip_dim) {
|
|
||||||
input_block_sizes[i] = output_block_sizes[i - 1];
|
|
||||||
input_block_strides[i] = output_block_strides[i - 1];
|
|
||||||
} else {
|
|
||||||
input_block_sizes[i] = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Fix up input_block_stride for chip dimension.
|
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
|
||||||
if (chip_dim == 0) {
|
|
||||||
input_block_strides[chip_dim] = 1;
|
|
||||||
} else {
|
|
||||||
input_block_strides[chip_dim] =
|
|
||||||
input_block_strides[chip_dim - 1] * input_block_sizes[chip_dim - 1];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (chip_dim == NumInputDims - 1) {
|
|
||||||
input_block_strides[chip_dim] = 1;
|
|
||||||
} else {
|
|
||||||
input_block_strides[chip_dim] =
|
|
||||||
input_block_strides[chip_dim + 1] * input_block_sizes[chip_dim + 1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Write input block.
|
|
||||||
this->m_impl.writeBlock(InputTensorBlock(
|
|
||||||
this->srcCoeff(output_block.first_coeff_index()), input_block_sizes,
|
|
||||||
input_block_strides, this->m_inputStrides,
|
|
||||||
const_cast<ScalarNoConst*>(output_block.data())));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TensorBlockV2>
|
template <typename TensorBlockV2>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
||||||
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
||||||
|
@ -471,8 +471,6 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
|||||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
typedef typename Storage::Type EvaluatorPointerType;
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||||
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
|
|
||||||
TensorBlock;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
@ -593,11 +591,6 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
|||||||
static const int NumDims = internal::array_size<
|
static const int NumDims = internal::array_size<
|
||||||
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
|
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
|
||||||
|
|
||||||
typedef internal::TensorBlock<
|
|
||||||
typename internal::remove_const<Scalar>::type, Index, NumDims,
|
|
||||||
TensorEvaluator<LeftArgType, Device>::Layout>
|
|
||||||
TensorBlock;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
|
@ -172,9 +172,8 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE void run(const Expression& expr,
|
static EIGEN_STRONG_INLINE void run(const Expression& expr,
|
||||||
const DefaultDevice& device = DefaultDevice()) {
|
const DefaultDevice& device = DefaultDevice()) {
|
||||||
typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
|
typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, StorageIndex>
|
||||||
typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
|
TensorBlockMapper;
|
||||||
typedef typename TensorBlock::Dimensions TensorBlockDimensions;
|
|
||||||
|
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, StorageIndex>
|
typedef internal::TensorBlockDescriptor<NumDims, StorageIndex>
|
||||||
TensorBlockDesc;
|
TensorBlockDesc;
|
||||||
@ -192,17 +191,15 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
|||||||
evaluator.getResourceRequirements();
|
evaluator.getResourceRequirements();
|
||||||
|
|
||||||
const TensorBlockMapper block_mapper(
|
const TensorBlockMapper block_mapper(
|
||||||
TensorBlockDimensions(evaluator.dimensions()), requirements.shapeV1(),
|
typename TensorBlockDesc::Dimensions(evaluator.dimensions()),
|
||||||
requirements.size);
|
requirements);
|
||||||
|
|
||||||
// Share scratch memory allocator between all blocks.
|
// Share scratch memory allocator between all blocks.
|
||||||
TensorBlockScratch scratch(device);
|
TensorBlockScratch scratch(device);
|
||||||
|
|
||||||
const StorageIndex total_block_count = block_mapper.total_block_count();
|
const StorageIndex total_block_count = block_mapper.blockCount();
|
||||||
for (StorageIndex i = 0; i < total_block_count; ++i) {
|
for (StorageIndex i = 0; i < total_block_count; ++i) {
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
|
TensorBlockDesc desc = block_mapper.blockDescriptor(i);
|
||||||
|
|
||||||
TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
|
|
||||||
evaluator.evalBlockV2(desc, scratch);
|
evaluator.evalBlockV2(desc, scratch);
|
||||||
scratch.reset();
|
scratch.reset();
|
||||||
}
|
}
|
||||||
@ -226,8 +223,6 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
|||||||
|
|
||||||
template <typename TensorBlockMapper>
|
template <typename TensorBlockMapper>
|
||||||
struct TensorExecutorTilingContext {
|
struct TensorExecutorTilingContext {
|
||||||
typedef typename TensorBlockMapper::Block TensorBlock;
|
|
||||||
|
|
||||||
TensorExecutorTilingContext() : buffer(nullptr) {}
|
TensorExecutorTilingContext() : buffer(nullptr) {}
|
||||||
TensorExecutorTilingContext(const TensorBlockMapper& b_mapper,
|
TensorExecutorTilingContext(const TensorBlockMapper& b_mapper,
|
||||||
const TensorOpCost& b_cost, void* b_buffer,
|
const TensorOpCost& b_cost, void* b_buffer,
|
||||||
@ -274,9 +269,9 @@ TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
|
|||||||
|
|
||||||
TensorBlockMapper block_mapper(
|
TensorBlockMapper block_mapper(
|
||||||
typename TensorBlockMapper::Dimensions(evaluator.dimensions()),
|
typename TensorBlockMapper::Dimensions(evaluator.dimensions()),
|
||||||
requirements.shapeV1(), block_size);
|
requirements);
|
||||||
|
|
||||||
block_size = block_mapper.block_dims_total_size();
|
block_size = block_mapper.blockTotalSize();
|
||||||
const size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1);
|
const size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1);
|
||||||
const size_t aligned_blocksize =
|
const size_t aligned_blocksize =
|
||||||
align *
|
align *
|
||||||
@ -382,9 +377,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
|
|||||||
static const int NumDims = traits<Expression>::NumDimensions;
|
static const int NumDims = traits<Expression>::NumDimensions;
|
||||||
|
|
||||||
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
||||||
typedef TensorBlockMapper<ScalarNoConst, IndexType, NumDims,
|
typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||||
Evaluator::Layout>
|
|
||||||
BlockMapper;
|
|
||||||
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
|
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
|
||||||
|
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, IndexType>
|
typedef internal::TensorBlockDescriptor<NumDims, IndexType>
|
||||||
@ -408,14 +401,13 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
|
|||||||
TensorBlockScratch scratch(device);
|
TensorBlockScratch scratch(device);
|
||||||
|
|
||||||
for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx; ++block_idx) {
|
for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx; ++block_idx) {
|
||||||
auto block = tiling.block_mapper.GetBlockForIndex(block_idx, nullptr);
|
TensorBlockDesc desc = tiling.block_mapper.blockDescriptor(block_idx);
|
||||||
TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
|
|
||||||
evaluator.evalBlockV2(desc, scratch);
|
evaluator.evalBlockV2(desc, scratch);
|
||||||
scratch.reset();
|
scratch.reset();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
device.parallelFor(tiling.block_mapper.total_block_count(), tiling.cost,
|
device.parallelFor(tiling.block_mapper.blockCount(), tiling.cost,
|
||||||
eval_block);
|
eval_block);
|
||||||
}
|
}
|
||||||
evaluator.cleanup();
|
evaluator.cleanup();
|
||||||
@ -486,9 +478,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
|
|||||||
static const int NumDims = traits<Expression>::NumDimensions;
|
static const int NumDims = traits<Expression>::NumDimensions;
|
||||||
|
|
||||||
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
|
||||||
typedef TensorBlockMapper<ScalarNoConst, IndexType, NumDims,
|
typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
|
||||||
Evaluator::Layout>
|
|
||||||
BlockMapper;
|
|
||||||
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
|
typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
|
||||||
|
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, IndexType> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, IndexType> TensorBlockDesc;
|
||||||
@ -518,14 +508,13 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
|
|||||||
|
|
||||||
for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx;
|
for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx;
|
||||||
++block_idx) {
|
++block_idx) {
|
||||||
auto block =
|
TensorBlockDesc desc =
|
||||||
ctx->tiling.block_mapper.GetBlockForIndex(block_idx, nullptr);
|
ctx->tiling.block_mapper.blockDescriptor(block_idx);
|
||||||
TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
|
|
||||||
ctx->evaluator.evalBlockV2(desc, scratch);
|
ctx->evaluator.evalBlockV2(desc, scratch);
|
||||||
scratch.reset();
|
scratch.reset();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
ctx->device.parallelForAsync(ctx->tiling.block_mapper.total_block_count(),
|
ctx->device.parallelForAsync(ctx->tiling.block_mapper.blockCount(),
|
||||||
ctx->tiling.cost, eval_block, [ctx]() { delete ctx; });
|
ctx->tiling.cost, eval_block, [ctx]() { delete ctx; });
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -102,9 +102,6 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
|
|
||||||
typedef internal::TensorIntDivisor<Index> IndexDivisor;
|
typedef internal::TensorIntDivisor<Index> IndexDivisor;
|
||||||
|
|
||||||
typedef internal::TensorBlock<CoeffReturnType, Index, NumDims, Layout>
|
|
||||||
TensorBlock;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
|
@ -238,9 +238,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
|
||||||
OutputTensorBlock;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
@ -465,9 +465,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
|
|
||||||
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> TensorBlock;
|
|
||||||
typedef typename TensorBlock::Dimensions TensorBlockDimensions;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
@ -757,9 +754,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
||||||
|
|
||||||
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> TensorBlock;
|
|
||||||
typedef typename TensorBlock::Dimensions TensorBlockDimensions;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
@ -829,14 +823,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
|
|
||||||
const TensorBlock& block) {
|
|
||||||
this->m_impl.writeBlock(TensorBlock(
|
|
||||||
this->srcCoeff(block.first_coeff_index()), block.block_sizes(),
|
|
||||||
block.block_strides(), TensorBlockDimensions(this->m_inputStrides),
|
|
||||||
const_cast<ScalarNoConst*>(block.data())));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename TensorBlockV2>
|
template<typename TensorBlockV2>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
|
||||||
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
const TensorBlockDesc& desc, const TensorBlockV2& block) {
|
||||||
|
@ -124,10 +124,6 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
|
|
||||||
typedef internal::TensorIntDivisor<Index> IndexDivisor;
|
typedef internal::TensorIntDivisor<Index> IndexDivisor;
|
||||||
|
|
||||||
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
|
|
||||||
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
|
|
||||||
OutputTensorBlock;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
@ -252,9 +248,8 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
|
||||||
const size_t target_block_size =
|
const size_t target_block_size =
|
||||||
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
|
||||||
return internal::TensorBlockV2ResourceRequirements::merge(
|
return {internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
{internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
|
target_block_size};
|
||||||
m_impl.getResourceRequirements());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||||
|
@ -19,6 +19,7 @@ using Eigen::Tensor;
|
|||||||
using Eigen::Index;
|
using Eigen::Index;
|
||||||
using Eigen::RowMajor;
|
using Eigen::RowMajor;
|
||||||
using Eigen::ColMajor;
|
using Eigen::ColMajor;
|
||||||
|
using Eigen::internal::TensorBlockV2ShapeType;
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -26,15 +27,15 @@ static const T& choose(int layout, const T& col, const T& row) {
|
|||||||
return layout == ColMajor ? col : row;
|
return layout == ColMajor ? col : row;
|
||||||
}
|
}
|
||||||
|
|
||||||
static internal::TensorBlockShapeType RandomShape() {
|
static TensorBlockV2ShapeType RandomShape() {
|
||||||
return internal::random<bool>()
|
return internal::random<bool>()
|
||||||
? internal::kUniformAllDims
|
? TensorBlockV2ShapeType::kUniformAllDims
|
||||||
: internal::kSkewedInnerDims;
|
: TensorBlockV2ShapeType::kSkewedInnerDims;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int NumDims>
|
template <int NumDims>
|
||||||
static Index RandomTargetSize(const DSizes<Index, NumDims>& dims) {
|
static size_t RandomTargetSize(const DSizes<Index, NumDims>& dims) {
|
||||||
return internal::random<Index>(1, dims.TotalSize());
|
return internal::random<size_t>(1, dims.TotalSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int NumDims>
|
template <int NumDims>
|
||||||
@ -66,55 +67,43 @@ static void Debug(DSizes<Index, NumDims> dims) {
|
|||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_block_mapper_sanity()
|
static void test_block_mapper_sanity()
|
||||||
{
|
{
|
||||||
typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
|
typedef internal::TensorBlockV2Mapper<2, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
DSizes<Index, 2> tensor_dims(100, 100);
|
DSizes<Index, 2> tensor_dims(100, 100);
|
||||||
|
|
||||||
// Test uniform blocks.
|
// Test uniform blocks.
|
||||||
TensorBlockMapper uniform_block_mapper(
|
TensorBlockMapper uniform_block_mapper(
|
||||||
tensor_dims, internal::kUniformAllDims, 100);
|
tensor_dims, {TensorBlockV2ShapeType::kUniformAllDims, 100});
|
||||||
|
|
||||||
VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
|
VERIFY_IS_EQUAL(uniform_block_mapper.blockCount(), 100);
|
||||||
VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
|
VERIFY_IS_EQUAL(uniform_block_mapper.blockTotalSize(), 100);
|
||||||
|
|
||||||
// 10x10 blocks
|
// 10x10 blocks
|
||||||
typename TensorBlockMapper::Block uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
|
auto uniform_b0 = uniform_block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
|
VERIFY_IS_EQUAL(uniform_b0.dimensions().at(0), 10);
|
||||||
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
|
VERIFY_IS_EQUAL(uniform_b0.dimensions().at(1), 10);
|
||||||
// Depending on a layout we stride by cols rows.
|
|
||||||
VERIFY_IS_EQUAL(uniform_b0.block_strides().at(0), choose(Layout, 1, 10));
|
|
||||||
VERIFY_IS_EQUAL(uniform_b0.block_strides().at(1), choose(Layout, 10, 1));
|
|
||||||
// Tensor strides depend only on a layout and not on the block size.
|
|
||||||
VERIFY_IS_EQUAL(uniform_b0.tensor_strides().at(0), choose(Layout, 1, 100));
|
|
||||||
VERIFY_IS_EQUAL(uniform_b0.tensor_strides().at(1), choose(Layout, 100, 1));
|
|
||||||
|
|
||||||
// Test skewed to inner dims blocks.
|
// Test skewed to inner dims blocks.
|
||||||
TensorBlockMapper skewed_block_mapper(
|
TensorBlockMapper skewed_block_mapper(
|
||||||
tensor_dims, internal::kSkewedInnerDims, 100);
|
tensor_dims, {TensorBlockV2ShapeType::kSkewedInnerDims, 100});
|
||||||
|
|
||||||
VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
|
VERIFY_IS_EQUAL(skewed_block_mapper.blockCount(), 100);
|
||||||
VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
|
VERIFY_IS_EQUAL(skewed_block_mapper.blockTotalSize(), 100);
|
||||||
|
|
||||||
// 1x100 (100x1) rows/cols depending on a tensor layout.
|
// 1x100 (100x1) rows/cols depending on a tensor layout.
|
||||||
typename TensorBlockMapper::Block skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
|
auto skewed_b0 = skewed_block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
|
VERIFY_IS_EQUAL(skewed_b0.dimensions().at(0), choose(Layout, 100, 1));
|
||||||
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
|
VERIFY_IS_EQUAL(skewed_b0.dimensions().at(1), choose(Layout, 1, 100));
|
||||||
// Depending on a layout we stride by cols rows.
|
|
||||||
VERIFY_IS_EQUAL(skewed_b0.block_strides().at(0), choose(Layout, 1, 100));
|
|
||||||
VERIFY_IS_EQUAL(skewed_b0.block_strides().at(1), choose(Layout, 100, 1));
|
|
||||||
// Tensor strides depend only on a layout and not on the block size.
|
|
||||||
VERIFY_IS_EQUAL(skewed_b0.tensor_strides().at(0), choose(Layout, 1, 100));
|
|
||||||
VERIFY_IS_EQUAL(skewed_b0.tensor_strides().at(1), choose(Layout, 100, 1));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given a TensorBlock "visit" every element accessible though it, and a keep an
|
// Given a TensorBlock "visit" every element accessible though it, and a keep an
|
||||||
// index in the visited set. Verify that every coeff accessed only once.
|
// index in the visited set. Verify that every coeff accessed only once.
|
||||||
template <typename T, int Layout, int NumDims>
|
template<int NumDims, int Layout>
|
||||||
static void UpdateCoeffSet(
|
static void UpdateCoeffSet(
|
||||||
const internal::TensorBlock<T, Index, NumDims, Layout>& block,
|
const DSizes<Index, NumDims>& tensor_strides,
|
||||||
|
const internal::TensorBlockDescriptor<NumDims>& block,
|
||||||
Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) {
|
Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) {
|
||||||
const DSizes<Index, NumDims>& block_sizes = block.block_sizes();
|
const DSizes<Index, NumDims>& block_sizes = block.dimensions();
|
||||||
const DSizes<Index, NumDims>& tensor_strides = block.tensor_strides();
|
|
||||||
|
|
||||||
for (int i = 0; i < block_sizes[dim_index]; ++i) {
|
for (int i = 0; i < block_sizes[dim_index]; ++i) {
|
||||||
if (tensor_strides[dim_index] == 1) {
|
if (tensor_strides[dim_index] == 1) {
|
||||||
@ -123,7 +112,7 @@ static void UpdateCoeffSet(
|
|||||||
VERIFY_IS_EQUAL(inserted.second, true);
|
VERIFY_IS_EQUAL(inserted.second, true);
|
||||||
} else {
|
} else {
|
||||||
int next_dim_index = dim_index + choose(Layout, -1, 1);
|
int next_dim_index = dim_index + choose(Layout, -1, 1);
|
||||||
UpdateCoeffSet<T, Layout, NumDims>(block, first_coeff_index,
|
UpdateCoeffSet<NumDims, Layout>(tensor_strides, block, first_coeff_index,
|
||||||
next_dim_index, visited_coeffs);
|
next_dim_index, visited_coeffs);
|
||||||
first_coeff_index += tensor_strides[dim_index];
|
first_coeff_index += tensor_strides[dim_index];
|
||||||
}
|
}
|
||||||
@ -132,22 +121,22 @@ static void UpdateCoeffSet(
|
|||||||
|
|
||||||
template <typename T, int NumDims, int Layout>
|
template <typename T, int NumDims, int Layout>
|
||||||
static void test_block_mapper_maps_every_element() {
|
static void test_block_mapper_maps_every_element() {
|
||||||
typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
|
typedef internal::TensorBlockV2Mapper<NumDims, Layout> TensorBlockMapper;
|
||||||
typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper;
|
|
||||||
|
|
||||||
DSizes<Index, NumDims> dims = RandomDims<NumDims>();
|
DSizes<Index, NumDims> dims = RandomDims<NumDims>();
|
||||||
|
DSizes<Index, NumDims> strides = internal::strides<Layout>(dims);
|
||||||
|
|
||||||
// Keep track of elements indices available via block access.
|
// Keep track of elements indices available via block access.
|
||||||
std::set<Index> coeff_set;
|
std::set<Index> coeff_set;
|
||||||
|
|
||||||
// Try different combinations of block types and sizes.
|
// Try different combinations of block types and sizes.
|
||||||
TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
|
TensorBlockMapper block_mapper(dims, {RandomShape(), RandomTargetSize(dims)});
|
||||||
|
|
||||||
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
for (int i = 0; i < block_mapper.blockCount(); ++i) {
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
|
auto block = block_mapper.blockDescriptor(i);
|
||||||
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
|
UpdateCoeffSet<NumDims, Layout>(strides, block, block.offset(),
|
||||||
choose(Layout, NumDims - 1, 0),
|
choose(Layout, NumDims - 1, 0),
|
||||||
&coeff_set);
|
&coeff_set);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify that every coefficient in the original Tensor is accessible through
|
// Verify that every coefficient in the original Tensor is accessible through
|
||||||
@ -237,20 +226,21 @@ public:
|
|||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_uniform_block_shape()
|
static void test_uniform_block_shape()
|
||||||
{
|
{
|
||||||
typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
|
typedef internal::TensorBlockDescriptor<5> TensorBlock;
|
||||||
typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
|
typedef internal::TensorBlockV2Mapper<5, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
{
|
{
|
||||||
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
|
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
|
const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
||||||
@ -258,25 +248,27 @@ static void test_uniform_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
|
const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
|
const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
||||||
@ -284,25 +276,27 @@ static void test_uniform_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
|
const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
|
const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
||||||
@ -310,111 +304,119 @@ static void test_uniform_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
|
const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[3]);
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY_IS_EQUAL(5, block.dimensions()[4]);
|
||||||
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||||
const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
|
const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(6, block.dimensions()[3]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[2]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY_IS_EQUAL(5, block.dimensions()[0]);
|
||||||
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'UniformAllDims' with full allocation to all dims.
|
// Test shape 'UniformAllDims' with full allocation to all dims.
|
||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
|
const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||||
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||||
const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
|
const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(9, block.dimensions()[3]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY_IS_EQUAL(7, block.dimensions()[0]);
|
||||||
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_skewed_inner_dim_block_shape()
|
static void test_skewed_inner_dim_block_shape()
|
||||||
{
|
{
|
||||||
typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
|
typedef internal::TensorBlockDescriptor<5> TensorBlock;
|
||||||
typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
|
typedef internal::TensorBlockV2Mapper<5, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
|
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
|
||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
|
const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(10, block.dimensions()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
|
const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(6, block.dimensions()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim.
|
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim.
|
||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
|
const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||||
for (int i = 1; i < 5; ++i) {
|
for (int i = 1; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
|
const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
for (int i = 3; i >= 0; --i) {
|
for (int i = 3; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
|
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
|
||||||
@ -422,27 +424,29 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
|
const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||||
|
VERIFY_IS_EQUAL(3, block.dimensions()[1]);
|
||||||
for (int i = 2; i < 5; ++i) {
|
for (int i = 2; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
|
const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
|
VERIFY_IS_EQUAL(15, block.dimensions()[3]);
|
||||||
for (int i = 2; i >= 0; --i) {
|
for (int i = 2; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
|
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
|
||||||
@ -450,61 +454,65 @@ static void test_skewed_inner_dim_block_shape()
|
|||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
|
const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.dimensions()[2]);
|
||||||
for (int i = 3; i < 5; ++i) {
|
for (int i = 3; i < 5; ++i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
|
const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.dimensions()[2]);
|
||||||
for (int i = 1; i >= 0; --i) {
|
for (int i = 1; i >= 0; --i) {
|
||||||
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
VERIFY_IS_EQUAL(1, block.dimensions()[i]);
|
||||||
}
|
}
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test shape 'SkewedInnerDims' with full allocation to all dims.
|
// Test shape 'SkewedInnerDims' with full allocation to all dims.
|
||||||
if (Layout == ColMajor) {
|
if (Layout == ColMajor) {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||||
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
} else {
|
} else {
|
||||||
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||||
TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
|
TensorBlockMapper
|
||||||
max_coeff_count);
|
block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
|
max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
TensorBlock block = block_mapper.blockDescriptor(0);
|
||||||
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
VERIFY_IS_EQUAL(7, block.dimensions()[4]);
|
||||||
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
VERIFY_IS_EQUAL(17, block.dimensions()[3]);
|
||||||
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
VERIFY_IS_EQUAL(6, block.dimensions()[2]);
|
||||||
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
VERIFY_IS_EQUAL(5, block.dimensions()[1]);
|
||||||
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
VERIFY_IS_EQUAL(11, block.dimensions()[0]);
|
||||||
|
VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
static void test_empty_dims(const internal::TensorBlockV2ShapeType block_shape)
|
||||||
{
|
{
|
||||||
// Test blocking of tensors with zero dimensions:
|
// Test blocking of tensors with zero dimensions:
|
||||||
// - we must not crash on asserts and divisions by zero
|
// - we must not crash on asserts and divisions by zero
|
||||||
@ -512,26 +520,28 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
|||||||
// (recipe for overflows/underflows, divisions by zero and NaNs later)
|
// (recipe for overflows/underflows, divisions by zero and NaNs later)
|
||||||
// - total block count must be zero
|
// - total block count must be zero
|
||||||
{
|
{
|
||||||
typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper;
|
typedef internal::TensorBlockV2Mapper<1, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
DSizes<Index, 1> dims(0);
|
DSizes<Index, 1> dims(0);
|
||||||
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||||
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
|
TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
|
||||||
VERIFY_IS_EQUAL(block_mapper.total_block_count(), 0);
|
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
|
||||||
VERIFY(block_mapper.block_dims_total_size() >= 1);
|
VERIFY(block_mapper.blockTotalSize() >= 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
|
typedef internal::TensorBlockV2Mapper<2, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
for (int dim1 = 0; dim1 < 3; ++dim1) {
|
for (int dim1 = 0; dim1 < 3; ++dim1) {
|
||||||
for (int dim2 = 0; dim2 < 3; ++dim2) {
|
for (int dim2 = 0; dim2 < 3; ++dim2) {
|
||||||
DSizes<Index, 2> dims(dim1, dim2);
|
DSizes<Index, 2> dims(dim1, dim2);
|
||||||
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||||
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
|
TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
|
||||||
if (dim1 * dim2 == 0) {
|
if (dim1 * dim2 == 0) {
|
||||||
VERIFY_IS_EQUAL(block_mapper.total_block_count(), 0);
|
VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
|
||||||
}
|
}
|
||||||
VERIFY(block_mapper.block_dims_total_size() >= 1);
|
VERIFY(block_mapper.blockTotalSize() >= 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -563,8 +573,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
|
|||||||
TEST_LAYOUTS_AND_DIMS(float, test_block_mapper_maps_every_element);
|
TEST_LAYOUTS_AND_DIMS(float, test_block_mapper_maps_every_element);
|
||||||
TEST_LAYOUTS(test_uniform_block_shape);
|
TEST_LAYOUTS(test_uniform_block_shape);
|
||||||
TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
|
TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
|
||||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims);
|
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockV2ShapeType::kUniformAllDims);
|
||||||
TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims);
|
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockV2ShapeType::kSkewedInnerDims);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef TEST_LAYOUTS
|
#undef TEST_LAYOUTS
|
||||||
|
@ -61,21 +61,21 @@ static TensorBlockParams<NumDims> RandomBlock(DSizes<Index, NumDims> dims,
|
|||||||
template <int Layout, int NumDims>
|
template <int Layout, int NumDims>
|
||||||
static TensorBlockParams<NumDims> SkewedInnerBlock(
|
static TensorBlockParams<NumDims> SkewedInnerBlock(
|
||||||
DSizes<Index, NumDims> dims) {
|
DSizes<Index, NumDims> dims) {
|
||||||
using BlockMapper = internal::TensorBlockMapper<int, Index, NumDims, Layout>;
|
using BlockMapper = internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
|
||||||
BlockMapper block_mapper(dims,
|
BlockMapper block_mapper(dims,
|
||||||
internal::TensorBlockShapeType::kSkewedInnerDims,
|
{internal::TensorBlockV2ShapeType::kSkewedInnerDims,
|
||||||
internal::random<Index>(1, dims.TotalSize()));
|
internal::random<size_t>(1, dims.TotalSize())});
|
||||||
|
|
||||||
Index total_blocks = block_mapper.total_block_count();
|
Index total_blocks = block_mapper.blockCount();
|
||||||
Index block_index = internal::random<Index>(0, total_blocks - 1);
|
Index block_index = internal::random<Index>(0, total_blocks - 1);
|
||||||
auto block = block_mapper.GetBlockForIndex(block_index, nullptr);
|
auto block = block_mapper.blockDescriptor(block_index);
|
||||||
DSizes<Index, NumDims> sizes = block.block_sizes();
|
DSizes<Index, NumDims> sizes = block.dimensions();
|
||||||
|
|
||||||
auto strides = internal::strides<Layout>(dims);
|
auto strides = internal::strides<Layout>(dims);
|
||||||
DSizes<Index, NumDims> offsets;
|
DSizes<Index, NumDims> offsets;
|
||||||
|
|
||||||
// Compute offsets for the first block coefficient.
|
// Compute offsets for the first block coefficient.
|
||||||
Index index = block.first_coeff_index();
|
Index index = block.offset();
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / strides[i];
|
const Index idx = index / strides[i];
|
||||||
@ -92,8 +92,7 @@ static TensorBlockParams<NumDims> SkewedInnerBlock(
|
|||||||
if (NumDims > 0) offsets[NumDims - 1] = index;
|
if (NumDims > 0) offsets[NumDims - 1] = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto desc = TensorBlockDescriptor<NumDims>(block.first_coeff_index(), sizes);
|
return {offsets, sizes, block};
|
||||||
return {offsets, sizes, desc};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int NumDims>
|
template <int NumDims>
|
||||||
|
@ -22,14 +22,15 @@ static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
|
|||||||
return DSizes<Index, NumDims>(dims);
|
return DSizes<Index, NumDims>(dims);
|
||||||
}
|
}
|
||||||
|
|
||||||
static internal::TensorBlockShapeType RandomBlockShape() {
|
static internal::TensorBlockV2ShapeType RandomBlockShape() {
|
||||||
return internal::random<bool>() ? internal::kUniformAllDims
|
return internal::random<bool>()
|
||||||
: internal::kSkewedInnerDims;
|
? internal::TensorBlockV2ShapeType::kUniformAllDims
|
||||||
|
: internal::TensorBlockV2ShapeType::kSkewedInnerDims;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int NumDims>
|
template <int NumDims>
|
||||||
static Index RandomTargetBlockSize(const DSizes<Index, NumDims>& dims) {
|
static size_t RandomTargetBlockSize(const DSizes<Index, NumDims>& dims) {
|
||||||
return internal::random<Index>(1, dims.TotalSize());
|
return internal::random<size_t>(1, dims.TotalSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int Layout, int NumDims>
|
template <int Layout, int NumDims>
|
||||||
@ -73,12 +74,12 @@ static void test_block_io_copy_data_from_source_to_target() {
|
|||||||
|
|
||||||
// Construct a tensor block mapper.
|
// Construct a tensor block mapper.
|
||||||
using TensorBlockMapper =
|
using TensorBlockMapper =
|
||||||
internal::TensorBlockMapper<T, Index, NumDims, Layout>;
|
internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
|
||||||
TensorBlockMapper block_mapper(dims, RandomBlockShape(),
|
TensorBlockMapper block_mapper(dims, {RandomBlockShape(),
|
||||||
RandomTargetBlockSize(dims));
|
RandomTargetBlockSize(dims)});
|
||||||
|
|
||||||
// We will copy data from input to output through this buffer.
|
// We will copy data from input to output through this buffer.
|
||||||
Tensor<T, NumDims, Layout> block(block_mapper.block_dim_sizes());
|
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
|
||||||
|
|
||||||
// Precompute strides for TensorBlockIO::Copy.
|
// Precompute strides for TensorBlockIO::Copy.
|
||||||
auto input_strides = internal::strides<Layout>(dims);
|
auto input_strides = internal::strides<Layout>(dims);
|
||||||
@ -88,24 +89,23 @@ static void test_block_io_copy_data_from_source_to_target() {
|
|||||||
T* output_data = output.data();
|
T* output_data = output.data();
|
||||||
T* block_data = block.data();
|
T* block_data = block.data();
|
||||||
|
|
||||||
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
for (int i = 0; i < block_mapper.blockCount(); ++i) {
|
||||||
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
|
auto desc = block_mapper.blockDescriptor(i);
|
||||||
TensorBlock blk = block_mapper.GetBlockForIndex(i, block_data);
|
|
||||||
|
|
||||||
auto blk_dims = blk.block_sizes();
|
auto blk_dims = desc.dimensions();
|
||||||
auto blk_strides = internal::strides<Layout>(blk_dims);
|
auto blk_strides = internal::strides<Layout>(blk_dims);
|
||||||
|
|
||||||
{
|
{
|
||||||
// Read from input into a block buffer.
|
// Read from input into a block buffer.
|
||||||
IODst dst(blk_dims, blk_strides, block_data, 0);
|
IODst dst(blk_dims, blk_strides, block_data, 0);
|
||||||
IOSrc src(input_strides, input_data, blk.first_coeff_index());
|
IOSrc src(input_strides, input_data, desc.offset());
|
||||||
|
|
||||||
TensorBlockIO::Copy(dst, src);
|
TensorBlockIO::Copy(dst, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Write from block buffer to output.
|
// Write from block buffer to output.
|
||||||
IODst dst(blk_dims, output_strides, output_data, blk.first_coeff_index());
|
IODst dst(blk_dims, output_strides, output_data, desc.offset());
|
||||||
IOSrc src(blk_strides, block_data, 0);
|
IOSrc src(blk_strides, block_data, 0);
|
||||||
|
|
||||||
TensorBlockIO::Copy(dst, src);
|
TensorBlockIO::Copy(dst, src);
|
||||||
@ -145,12 +145,12 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
|||||||
// Construct a tensor block mapper.
|
// Construct a tensor block mapper.
|
||||||
// NOTE: Tensor block mapper works with shuffled dimensions.
|
// NOTE: Tensor block mapper works with shuffled dimensions.
|
||||||
using TensorBlockMapper =
|
using TensorBlockMapper =
|
||||||
internal::TensorBlockMapper<T, Index, NumDims, Layout>;
|
internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
|
||||||
TensorBlockMapper block_mapper(output_tensor_dims, RandomBlockShape(),
|
TensorBlockMapper block_mapper(output_tensor_dims, {RandomBlockShape(),
|
||||||
RandomTargetBlockSize(output_tensor_dims));
|
RandomTargetBlockSize(output_tensor_dims)});
|
||||||
|
|
||||||
// We will copy data from input to output through this buffer.
|
// We will copy data from input to output through this buffer.
|
||||||
Tensor<T, NumDims, Layout> block(block_mapper.block_dim_sizes());
|
Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
|
||||||
|
|
||||||
// Precompute strides for TensorBlockIO::Copy.
|
// Precompute strides for TensorBlockIO::Copy.
|
||||||
auto input_strides = internal::strides<Layout>(dims);
|
auto input_strides = internal::strides<Layout>(dims);
|
||||||
@ -160,12 +160,11 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
|||||||
T* output_data = output.data();
|
T* output_data = output.data();
|
||||||
T* block_data = block.data();
|
T* block_data = block.data();
|
||||||
|
|
||||||
for (Index i = 0; i < block_mapper.total_block_count(); ++i) {
|
for (Index i = 0; i < block_mapper.blockCount(); ++i) {
|
||||||
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
|
auto desc = block_mapper.blockDescriptor(i);
|
||||||
TensorBlock blk = block_mapper.GetBlockForIndex(i, block_data);
|
|
||||||
|
|
||||||
const Index first_coeff_index = GetInputIndex<Layout, NumDims>(
|
const Index first_coeff_index = GetInputIndex<Layout, NumDims>(
|
||||||
blk.first_coeff_index(), output_to_input_dim_map, input_strides,
|
desc.offset(), output_to_input_dim_map, input_strides,
|
||||||
output_strides);
|
output_strides);
|
||||||
|
|
||||||
// NOTE: Block dimensions are in the same order as output dimensions.
|
// NOTE: Block dimensions are in the same order as output dimensions.
|
||||||
@ -174,7 +173,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
|
|||||||
using IODst = typename TensorBlockIO::Dst;
|
using IODst = typename TensorBlockIO::Dst;
|
||||||
using IOSrc = typename TensorBlockIO::Src;
|
using IOSrc = typename TensorBlockIO::Src;
|
||||||
|
|
||||||
auto blk_dims = blk.block_sizes();
|
auto blk_dims = desc.dimensions();
|
||||||
auto blk_strides = internal::strides<Layout>(blk_dims);
|
auto blk_strides = internal::strides<Layout>(blk_dims);
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -236,16 +235,13 @@ static void test_block_io_copy_using_reordered_dimensions_do_not_squeeze() {
|
|||||||
float* tensor_data = tensor.data();
|
float* tensor_data = tensor.data();
|
||||||
float* block_data = block.data();
|
float* block_data = block.data();
|
||||||
|
|
||||||
typedef internal::TensorBlock<float, Index, 3, Layout> TensorBlock;
|
|
||||||
TensorBlock blk(0, block_dims, block_strides, tensor_strides, block_data);
|
|
||||||
|
|
||||||
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 3, Layout>;
|
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 3, Layout>;
|
||||||
using IODst = typename TensorBlockIO::Dst;
|
using IODst = typename TensorBlockIO::Dst;
|
||||||
using IOSrc = typename TensorBlockIO::Src;
|
using IOSrc = typename TensorBlockIO::Src;
|
||||||
|
|
||||||
// Read from a tensor into a block.
|
// Read from a tensor into a block.
|
||||||
IODst dst(blk.block_sizes(), block_strides, block_data, 0);
|
IODst dst(block_dims, block_strides, block_data, 0);
|
||||||
IOSrc src(tensor_strides, tensor_data, blk.first_coeff_index());
|
IOSrc src(tensor_strides, tensor_data, 0);
|
||||||
|
|
||||||
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
|
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
|
||||||
|
|
||||||
@ -287,16 +283,13 @@ static void test_block_io_copy_using_reordered_dimensions_squeeze() {
|
|||||||
float* tensor_data = tensor.data();
|
float* tensor_data = tensor.data();
|
||||||
float* block_data = block.data();
|
float* block_data = block.data();
|
||||||
|
|
||||||
typedef internal::TensorBlock<float, Index, 4, Layout> TensorBlock;
|
|
||||||
TensorBlock blk(0, block_dims, block_strides, tensor_strides, block_data);
|
|
||||||
|
|
||||||
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 4, Layout>;
|
using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 4, Layout>;
|
||||||
using IODst = typename TensorBlockIO::Dst;
|
using IODst = typename TensorBlockIO::Dst;
|
||||||
using IOSrc = typename TensorBlockIO::Src;
|
using IOSrc = typename TensorBlockIO::Src;
|
||||||
|
|
||||||
// Read from a tensor into a block.
|
// Read from a tensor into a block.
|
||||||
IODst dst(blk.block_sizes(), block_strides, block_data, 0);
|
IODst dst(block_dims, block_strides, block_data, 0);
|
||||||
IOSrc src(tensor_strides, tensor_data, blk.first_coeff_index());
|
IOSrc src(tensor_strides, tensor_data, 0);
|
||||||
|
|
||||||
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
|
TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user