mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
TensorBlockIO
This commit is contained in:
parent
34a75c3c5c
commit
d55efa6f0f
@ -14,6 +14,32 @@
|
|||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// Helper template to choose between ColMajor and RowMajor values.
|
||||||
|
template <int Layout>
|
||||||
|
struct cond;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct cond<ColMajor> {
|
||||||
|
template <typename T>
|
||||||
|
EIGEN_STRONG_INLINE const T& operator()(const T& col,
|
||||||
|
const T& /*row*/) const {
|
||||||
|
return col;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct cond<RowMajor> {
|
||||||
|
template <typename T>
|
||||||
|
EIGEN_STRONG_INLINE const T& operator()(const T& /*col*/,
|
||||||
|
const T& row) const {
|
||||||
|
return row;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \class TensorBlockShapeType
|
* \class TensorBlockShapeType
|
||||||
* \ingroup CXX11_Tensor_Module
|
* \ingroup CXX11_Tensor_Module
|
||||||
@ -82,6 +108,512 @@ class TensorBlock {
|
|||||||
Scalar* m_data; // Not owned.
|
Scalar* m_data; // Not owned.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, typename Index, bool Vectorizable>
|
||||||
|
struct TensorBlockCopyOp {
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const Index num_coeff_to_copy, const Index dst_index,
|
||||||
|
const Index dst_stride, Scalar* EIGEN_RESTRICT dst_data,
|
||||||
|
const Index src_index, const Index src_stride,
|
||||||
|
const Scalar* EIGEN_RESTRICT src_data) {
|
||||||
|
for (Index i = 0; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst_data[dst_index + i * dst_stride] =
|
||||||
|
src_data[src_index + i * src_stride];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// NOTE: Benchmarks run on an implementation of this that broke each of the
|
||||||
|
// loops in these conditionals into it's own template specialization (to
|
||||||
|
// avoid conditionals in the caller's loop) did not show an improvement.
|
||||||
|
template <typename Scalar, typename Index>
|
||||||
|
struct TensorBlockCopyOp<Scalar, Index, true> {
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const Index num_coeff_to_copy, const Index dst_index,
|
||||||
|
const Index dst_stride, Scalar* EIGEN_RESTRICT dst_data,
|
||||||
|
const Index src_index, const Index src_stride,
|
||||||
|
const Scalar* EIGEN_RESTRICT src_data) {
|
||||||
|
if (src_stride == 1) {
|
||||||
|
const Index packet_size = internal::unpacket_traits<Packet>::size;
|
||||||
|
const Index vectorized_size =
|
||||||
|
(num_coeff_to_copy / packet_size) * packet_size;
|
||||||
|
if (dst_stride == 1) {
|
||||||
|
// LINEAR
|
||||||
|
for (Index i = 0; i < vectorized_size; i += packet_size) {
|
||||||
|
Packet p = internal::ploadu<Packet>(src_data + src_index + i);
|
||||||
|
internal::pstoreu<Scalar, Packet>(dst_data + dst_index + i, p);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst_data[dst_index + i] = src_data[src_index + i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// SCATTER
|
||||||
|
for (Index i = 0; i < vectorized_size; i += packet_size) {
|
||||||
|
Packet p = internal::ploadu<Packet>(src_data + src_index + i);
|
||||||
|
internal::pscatter<Scalar, Packet>(
|
||||||
|
dst_data + dst_index + i * dst_stride, p, dst_stride);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst_data[dst_index + i * dst_stride] = src_data[src_index + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (src_stride == 0) {
|
||||||
|
const Index packet_size = internal::unpacket_traits<Packet>::size;
|
||||||
|
const Index vectorized_size =
|
||||||
|
(num_coeff_to_copy / packet_size) * packet_size;
|
||||||
|
if (dst_stride == 1) {
|
||||||
|
// LINEAR
|
||||||
|
for (Index i = 0; i < vectorized_size; i += packet_size) {
|
||||||
|
Packet p = internal::pload1<Packet>(src_data + src_index);
|
||||||
|
internal::pstoreu<Scalar, Packet>(dst_data + dst_index + i, p);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst_data[dst_index + i] = src_data[src_index];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// SCATTER
|
||||||
|
for (Index i = 0; i < vectorized_size; i += packet_size) {
|
||||||
|
Packet p = internal::pload1<Packet>(src_data + src_index);
|
||||||
|
internal::pscatter<Scalar, Packet>(
|
||||||
|
dst_data + dst_index + i * dst_stride, p, dst_stride);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst_data[dst_index + i * dst_stride] = src_data[src_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (dst_stride == 1) {
|
||||||
|
// GATHER
|
||||||
|
const Index packet_size = internal::unpacket_traits<Packet>::size;
|
||||||
|
const Index vectorized_size =
|
||||||
|
(num_coeff_to_copy / packet_size) * packet_size;
|
||||||
|
for (Index i = 0; i < vectorized_size; i += packet_size) {
|
||||||
|
Packet p = internal::pgather<Scalar, Packet>(
|
||||||
|
src_data + src_index + i * src_stride, src_stride);
|
||||||
|
internal::pstoreu<Scalar, Packet>(dst_data + dst_index + i, p);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst_data[dst_index + i] = src_data[src_index + i * src_stride];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// RANDOM
|
||||||
|
for (Index i = 0; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst_data[dst_index + i * dst_stride] =
|
||||||
|
src_data[src_index + i * src_stride];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \class TensorBlockIO
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Tensor block IO class.
|
||||||
|
*
|
||||||
|
* This class is responsible for copying data between a tensor and a tensor
|
||||||
|
* block.
|
||||||
|
*/
|
||||||
|
template <typename Scalar, typename Index, int NumDims, int Layout,
|
||||||
|
bool Vectorizable, bool BlockRead>
|
||||||
|
class TensorBlockIO {
|
||||||
|
public:
|
||||||
|
typedef typename internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
||||||
|
TensorBlock;
|
||||||
|
typedef typename internal::TensorBlockCopyOp<Scalar, Index, Vectorizable>
|
||||||
|
TensorBlockCopyOp;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
struct BlockIteratorState {
|
||||||
|
Index input_stride;
|
||||||
|
Index output_stride;
|
||||||
|
Index input_span;
|
||||||
|
Index output_span;
|
||||||
|
Index size;
|
||||||
|
Index count;
|
||||||
|
};
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
|
||||||
|
const TensorBlock& block, Index first_coeff_index,
|
||||||
|
const array<Index, NumDims>& tensor_to_block_dim_map,
|
||||||
|
const array<Index, NumDims>& tensor_strides, const Scalar* src_data,
|
||||||
|
Scalar* dst_data) {
|
||||||
|
// Find the innermost tensor dimension whose size is not 1. This is the
|
||||||
|
// effective inner dim. If all dimensions are of size 1, then fallback to
|
||||||
|
// using the actual innermost dim to avoid out-of-bound access.
|
||||||
|
Index num_size_one_inner_dims = 0;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
|
if (block.block_sizes()[tensor_to_block_dim_map[dim]] != 1) {
|
||||||
|
num_size_one_inner_dims = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Calculate strides and dimensions.
|
||||||
|
const Index tensor_stride1_dim = cond<Layout>()(
|
||||||
|
num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1);
|
||||||
|
const Index block_dim_for_tensor_stride1_dim =
|
||||||
|
NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim];
|
||||||
|
size_t block_inner_dim_size =
|
||||||
|
NumDims == 0 ? 1
|
||||||
|
: block.block_sizes()[block_dim_for_tensor_stride1_dim];
|
||||||
|
for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
|
const Index block_stride =
|
||||||
|
block.block_strides()[tensor_to_block_dim_map[dim]];
|
||||||
|
if (block_inner_dim_size == block_stride &&
|
||||||
|
block_stride == tensor_strides[dim]) {
|
||||||
|
block_inner_dim_size *=
|
||||||
|
block.block_sizes()[tensor_to_block_dim_map[dim]];
|
||||||
|
++num_size_one_inner_dims;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Index inputIndex;
|
||||||
|
Index outputIndex;
|
||||||
|
Index input_stride;
|
||||||
|
Index output_stride;
|
||||||
|
|
||||||
|
// Setup strides to read/write along the tensor's stride1 dimension.
|
||||||
|
if (BlockRead) {
|
||||||
|
inputIndex = first_coeff_index;
|
||||||
|
outputIndex = 0;
|
||||||
|
input_stride = NumDims == 0 ? 1 : tensor_strides[tensor_stride1_dim];
|
||||||
|
output_stride =
|
||||||
|
NumDims == 0
|
||||||
|
? 1
|
||||||
|
: block.block_strides()[block_dim_for_tensor_stride1_dim];
|
||||||
|
} else {
|
||||||
|
inputIndex = 0;
|
||||||
|
outputIndex = first_coeff_index;
|
||||||
|
input_stride =
|
||||||
|
NumDims == 0
|
||||||
|
? 1
|
||||||
|
: block.block_strides()[block_dim_for_tensor_stride1_dim];
|
||||||
|
output_stride = NumDims == 0 ? 1 : tensor_strides[tensor_stride1_dim];
|
||||||
|
}
|
||||||
|
|
||||||
|
const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
|
||||||
|
array<BlockIteratorState, at_least_1_dim> block_iter_state;
|
||||||
|
|
||||||
|
// Initialize block iterator state. Squeeze away any dimension of size 1.
|
||||||
|
int num_squeezed_dims = 0;
|
||||||
|
for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
|
||||||
|
const Index size = block.block_sizes()[tensor_to_block_dim_map[dim]];
|
||||||
|
if (size == 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
block_iter_state[num_squeezed_dims].size = size;
|
||||||
|
if (BlockRead) {
|
||||||
|
block_iter_state[num_squeezed_dims].input_stride = tensor_strides[dim];
|
||||||
|
block_iter_state[num_squeezed_dims].output_stride =
|
||||||
|
block.block_strides()[tensor_to_block_dim_map[dim]];
|
||||||
|
} else {
|
||||||
|
block_iter_state[num_squeezed_dims].input_stride =
|
||||||
|
block.block_strides()[tensor_to_block_dim_map[dim]];
|
||||||
|
block_iter_state[num_squeezed_dims].output_stride = tensor_strides[dim];
|
||||||
|
}
|
||||||
|
block_iter_state[num_squeezed_dims].input_span =
|
||||||
|
block_iter_state[num_squeezed_dims].input_stride *
|
||||||
|
(block_iter_state[num_squeezed_dims].size - 1);
|
||||||
|
block_iter_state[num_squeezed_dims].output_span =
|
||||||
|
block_iter_state[num_squeezed_dims].output_stride *
|
||||||
|
(block_iter_state[num_squeezed_dims].size - 1);
|
||||||
|
block_iter_state[num_squeezed_dims].count = 0;
|
||||||
|
++num_squeezed_dims;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate copying data from src to dst.
|
||||||
|
const Index block_total_size =
|
||||||
|
NumDims == 0 ? 1 : block.block_sizes().TotalSize();
|
||||||
|
for (Index i = 0; i < block_total_size; i += block_inner_dim_size) {
|
||||||
|
TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
|
||||||
|
dst_data, inputIndex, input_stride, src_data);
|
||||||
|
// Update index.
|
||||||
|
for (int j = 0; j < num_squeezed_dims; ++j) {
|
||||||
|
if (++block_iter_state[j].count < block_iter_state[j].size) {
|
||||||
|
inputIndex += block_iter_state[j].input_stride;
|
||||||
|
outputIndex += block_iter_state[j].output_stride;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
block_iter_state[j].count = 0;
|
||||||
|
inputIndex -= block_iter_state[j].input_span;
|
||||||
|
outputIndex -= block_iter_state[j].output_span;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \class TensorBlockReader
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Tensor block reader class.
|
||||||
|
*
|
||||||
|
* This class is responsible for reading a tensor block.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Scalar, typename Index, int NumDims, int Layout,
|
||||||
|
bool Vectorizable>
|
||||||
|
class TensorBlockReader
|
||||||
|
: public TensorBlockIO<Scalar, Index, NumDims, Layout, Vectorizable, true> {
|
||||||
|
public:
|
||||||
|
typedef typename internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
||||||
|
TensorBlock;
|
||||||
|
typedef TensorBlockIO<Scalar, Index, NumDims, Layout, Vectorizable, true>
|
||||||
|
Base;
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
TensorBlock* block, const Scalar* src_data) {
|
||||||
|
array<Index, NumDims> tensor_to_block_dim_map;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
tensor_to_block_dim_map[i] = i;
|
||||||
|
}
|
||||||
|
Base::Copy(*block, block->first_coeff_index(), tensor_to_block_dim_map,
|
||||||
|
block->tensor_strides(), src_data, block->data());
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
TensorBlock* block, Index first_coeff_index,
|
||||||
|
const array<Index, NumDims>& tensor_to_block_dim_map,
|
||||||
|
const array<Index, NumDims>& tensor_strides, const Scalar* src_data) {
|
||||||
|
Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
|
||||||
|
tensor_strides, src_data, block->data());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \class TensorBlockWriter
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Tensor block writer class.
|
||||||
|
*
|
||||||
|
* This class is responsible for writing a tensor block.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Scalar, typename Index, int NumDims, int Layout,
|
||||||
|
bool Vectorizable>
|
||||||
|
class TensorBlockWriter : public TensorBlockIO<Scalar, Index, NumDims, Layout,
|
||||||
|
Vectorizable, false> {
|
||||||
|
public:
|
||||||
|
typedef typename internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
||||||
|
TensorBlock;
|
||||||
|
typedef TensorBlockIO<Scalar, Index, NumDims, Layout, Vectorizable, false>
|
||||||
|
Base;
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const TensorBlock& block, Scalar* dst_data) {
|
||||||
|
array<Index, NumDims> tensor_to_block_dim_map;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
tensor_to_block_dim_map[i] = i;
|
||||||
|
}
|
||||||
|
Base::Copy(block, block.first_coeff_index(), tensor_to_block_dim_map,
|
||||||
|
block.tensor_strides(), block.data(), dst_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const TensorBlock& block, Index first_coeff_index,
|
||||||
|
const array<Index, NumDims>& tensor_to_block_dim_map,
|
||||||
|
const array<Index, NumDims>& tensor_strides, Scalar* dst_data) {
|
||||||
|
Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
|
||||||
|
tensor_strides, block.data(), dst_data);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \class TensorBlockCwiseBinaryOp
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Carries out a cwise binary op on a number of coefficients.
|
||||||
|
*
|
||||||
|
* This class reads strided inputs from left and right operands, and writes the
|
||||||
|
* result of the cwise binary op to the strided output array.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <bool Vectorizable>
|
||||||
|
struct TensorBlockCwiseBinaryOp {
|
||||||
|
template <typename Index, typename BinaryFunctor, typename OutputScalar,
|
||||||
|
typename LeftScalar, typename RightScalar>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const BinaryFunctor& functor, const Index num_coeff,
|
||||||
|
const Index output_index, const Index output_stride,
|
||||||
|
OutputScalar* output_data, const Index left_index,
|
||||||
|
const Index left_stride, const LeftScalar* left_data,
|
||||||
|
const Index right_index, const Index right_stride,
|
||||||
|
const RightScalar* right_data) {
|
||||||
|
for (Index i = 0; i < num_coeff; ++i) {
|
||||||
|
output_data[output_index + i * output_stride] =
|
||||||
|
functor(left_data[left_index + i * left_stride],
|
||||||
|
right_data[right_index + i * right_stride]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct TensorBlockCwiseBinaryOp<true> {
|
||||||
|
template <typename Index, typename BinaryFunctor, typename OutputScalar,
|
||||||
|
typename LeftScalar, typename RightScalar>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const BinaryFunctor& functor, const Index num_coeff,
|
||||||
|
const Index output_index, const Index output_stride,
|
||||||
|
OutputScalar* output_data, const Index left_index,
|
||||||
|
const Index left_stride, const LeftScalar* left_data,
|
||||||
|
const Index right_index, const Index right_stride,
|
||||||
|
const RightScalar* right_data) {
|
||||||
|
EIGEN_STATIC_ASSERT(functor_traits<BinaryFunctor>::PacketAccess,
|
||||||
|
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
typedef typename packet_traits<OutputScalar>::type OutputPacket;
|
||||||
|
typedef typename packet_traits<LeftScalar>::type LeftPacket;
|
||||||
|
typedef typename packet_traits<RightScalar>::type RightPacket;
|
||||||
|
const Index packet_size = unpacket_traits<OutputPacket>::size;
|
||||||
|
EIGEN_STATIC_ASSERT(unpacket_traits<LeftPacket>::size == packet_size,
|
||||||
|
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
EIGEN_STATIC_ASSERT(unpacket_traits<RightPacket>::size == packet_size,
|
||||||
|
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
const Index vectorized_size = (num_coeff / packet_size) * packet_size;
|
||||||
|
if (output_stride != 1 || left_stride != 1 || right_stride != 1) {
|
||||||
|
TensorBlockCwiseBinaryOp<false>::Run(
|
||||||
|
functor, num_coeff, output_index, output_stride, output_data,
|
||||||
|
left_index, left_stride, left_data, right_index, right_stride,
|
||||||
|
right_data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Vectorization for the most common case.
|
||||||
|
for (Index i = 0; i < vectorized_size; i += packet_size) {
|
||||||
|
LeftPacket l = internal::ploadu<LeftPacket>(left_data + left_index + i);
|
||||||
|
RightPacket r =
|
||||||
|
internal::ploadu<RightPacket>(right_data + right_index + i);
|
||||||
|
OutputPacket p = functor.packetOp(l, r);
|
||||||
|
internal::pstoreu<OutputScalar, OutputPacket>(
|
||||||
|
output_data + output_index + i, p);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff; ++i) {
|
||||||
|
output_data[output_index + i] =
|
||||||
|
functor(left_data[left_index + i], right_data[right_index + i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \class TensorBlockCwiseBinaryIO
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Tensor block IO class for carrying out cwise binary ops.
|
||||||
|
*
|
||||||
|
* This class carries out the binary op on given blocks.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename BinaryFunctor, typename Index, typename OutputScalar,
|
||||||
|
int NumDims, int Layout>
|
||||||
|
struct TensorBlockCwiseBinaryIO {
|
||||||
|
typedef typename internal::TensorBlock<OutputScalar, Index, NumDims,
|
||||||
|
Layout>::Dimensions Dimensions;
|
||||||
|
typedef internal::TensorBlockCwiseBinaryOp<
|
||||||
|
functor_traits<BinaryFunctor>::PacketAccess>
|
||||||
|
TensorBlockCwiseBinaryOp;
|
||||||
|
|
||||||
|
struct BlockIteratorState {
|
||||||
|
Index output_stride, output_span;
|
||||||
|
Index left_stride, left_span;
|
||||||
|
Index right_stride, right_span;
|
||||||
|
Index size, count;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename LeftScalar, typename RightScalar>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
|
const BinaryFunctor& functor, const Dimensions& block_sizes,
|
||||||
|
const Dimensions& block_strides, OutputScalar* output_data,
|
||||||
|
const array<Index, NumDims>& left_strides, const LeftScalar* left_data,
|
||||||
|
const array<Index, NumDims>& right_strides,
|
||||||
|
const RightScalar* right_data) {
|
||||||
|
// Find the innermost dimension whose size is not 1. This is the effective
|
||||||
|
// inner dim. If all dimensions are of size 1, fallback to using the actual
|
||||||
|
// innermost dim to avoid out-of-bound access.
|
||||||
|
int num_size_one_inner_dims = 0;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
|
if (block_sizes[dim] != 1) {
|
||||||
|
num_size_one_inner_dims = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Calculate strides and dimensions.
|
||||||
|
const int inner_dim =
|
||||||
|
NumDims == 0 ? 1
|
||||||
|
: cond<Layout>()(num_size_one_inner_dims,
|
||||||
|
NumDims - num_size_one_inner_dims - 1);
|
||||||
|
Index inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim];
|
||||||
|
for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
|
// Merge multiple inner dims into one for larger inner dim size (i.e.
|
||||||
|
// fewer calls to TensorBlockCwiseBinaryOp::Run()).
|
||||||
|
if (inner_dim_size == block_strides[dim] &&
|
||||||
|
block_strides[dim] == left_strides[dim] &&
|
||||||
|
block_strides[dim] == right_strides[dim]) {
|
||||||
|
inner_dim_size *= block_sizes[dim];
|
||||||
|
++num_size_one_inner_dims;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Index output_index = 0, left_index = 0, right_index = 0;
|
||||||
|
const Index output_stride = NumDims == 0 ? 1 : block_strides[inner_dim];
|
||||||
|
const Index left_stride = NumDims == 0 ? 1 : left_strides[inner_dim];
|
||||||
|
const Index right_stride = NumDims == 0 ? 1 : right_strides[inner_dim];
|
||||||
|
|
||||||
|
const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
|
||||||
|
array<BlockIteratorState, at_least_1_dim> block_iter_state;
|
||||||
|
|
||||||
|
// Initialize block iterator state. Squeeze away any dimension of size 1.
|
||||||
|
int num_squeezed_dims = 0;
|
||||||
|
for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
|
||||||
|
const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
|
||||||
|
const Index size = block_sizes[dim];
|
||||||
|
if (size == 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto& state = block_iter_state[num_squeezed_dims];
|
||||||
|
state.output_stride = block_strides[dim];
|
||||||
|
state.left_stride = left_strides[dim];
|
||||||
|
state.right_stride = right_strides[dim];
|
||||||
|
state.size = size;
|
||||||
|
state.output_span = state.output_stride * (size - 1);
|
||||||
|
state.left_span = state.left_stride * (size - 1);
|
||||||
|
state.right_span = state.right_stride * (size - 1);
|
||||||
|
state.count = 0;
|
||||||
|
++num_squeezed_dims;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute cwise binary op.
|
||||||
|
const Index block_total_size = NumDims == 0 ? 1 : block_sizes.TotalSize();
|
||||||
|
for (Index i = 0; i < block_total_size; i += inner_dim_size) {
|
||||||
|
TensorBlockCwiseBinaryOp::Run(functor, inner_dim_size, output_index,
|
||||||
|
output_stride, output_data, left_index,
|
||||||
|
left_stride, left_data, right_index,
|
||||||
|
right_stride, right_data);
|
||||||
|
// Update index.
|
||||||
|
for (int j = 0; j < num_squeezed_dims; ++j) {
|
||||||
|
auto& state = block_iter_state[j];
|
||||||
|
if (++state.count < state.size) {
|
||||||
|
output_index += state.output_stride;
|
||||||
|
left_index += state.left_stride;
|
||||||
|
right_index += state.right_stride;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
state.count = 0;
|
||||||
|
output_index -= state.output_span;
|
||||||
|
left_index -= state.left_span;
|
||||||
|
right_index -= state.right_span;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \class TensorBlockMapper
|
* \class TensorBlockMapper
|
||||||
* \ingroup CXX11_Tensor_Module
|
* \ingroup CXX11_Tensor_Module
|
||||||
@ -90,7 +622,7 @@ class TensorBlock {
|
|||||||
*
|
*
|
||||||
* This class is responsible for iterating over the blocks of a tensor.
|
* This class is responsible for iterating over the blocks of a tensor.
|
||||||
*/
|
*/
|
||||||
template <typename Scalar, typename Index, std::size_t NumDims, int Layout>
|
template <typename Scalar, typename Index, int NumDims, int Layout>
|
||||||
class TensorBlockMapper {
|
class TensorBlockMapper {
|
||||||
public:
|
public:
|
||||||
typedef typename internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
typedef typename internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
||||||
@ -190,10 +722,6 @@ class TensorBlockMapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static int InnerDimIndex(Index i) {
|
|
||||||
return Layout == static_cast<int>(ColMajor) ? i : NumDims - i - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static Dimensions BlockDimensions(const Dimensions& tensor_dims,
|
static Dimensions BlockDimensions(const Dimensions& tensor_dims,
|
||||||
const TensorBlockShapeType block_shape,
|
const TensorBlockShapeType block_shape,
|
||||||
size_t min_target_size) {
|
size_t min_target_size) {
|
||||||
@ -228,7 +756,7 @@ class TensorBlockMapper {
|
|||||||
// Add any un-allocated coefficients to inner dimension(s).
|
// Add any un-allocated coefficients to inner dimension(s).
|
||||||
Index total_size = block_dim_sizes.TotalSize();
|
Index total_size = block_dim_sizes.TotalSize();
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
const int dim = InnerDimIndex(i);
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
if (block_dim_sizes[dim] < tensor_dims[dim]) {
|
if (block_dim_sizes[dim] < tensor_dims[dim]) {
|
||||||
const Index total_size_other_dims =
|
const Index total_size_other_dims =
|
||||||
total_size / block_dim_sizes[dim];
|
total_size / block_dim_sizes[dim];
|
||||||
@ -245,7 +773,7 @@ class TensorBlockMapper {
|
|||||||
} else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) {
|
} else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) {
|
||||||
Index coeff_to_allocate = min_target_size;
|
Index coeff_to_allocate = min_target_size;
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
const int dim = InnerDimIndex(i);
|
const int dim = cond<Layout>()(i, NumDims - i - 1);
|
||||||
block_dim_sizes[dim] =
|
block_dim_sizes[dim] =
|
||||||
numext::mini(coeff_to_allocate, tensor_dims[dim]);
|
numext::mini(coeff_to_allocate, tensor_dims[dim]);
|
||||||
coeff_to_allocate =
|
coeff_to_allocate =
|
||||||
@ -284,7 +812,7 @@ class TensorBlockMapper {
|
|||||||
* processed together.
|
* processed together.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
template <typename Scalar, typename Index, std::size_t NumDims, int Layout>
|
template <typename Scalar, typename Index, int NumDims, int Layout>
|
||||||
class TensorSliceBlockMapper {
|
class TensorSliceBlockMapper {
|
||||||
public:
|
public:
|
||||||
typedef typename internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
typedef typename internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
||||||
@ -360,7 +888,7 @@ class TensorSliceBlockMapper {
|
|||||||
prev_dim = curr_dim;
|
prev_dim = curr_dim;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < static_cast<int>(NumDims) - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = block_index / m_block_strides[i];
|
const Index idx = block_index / m_block_strides[i];
|
||||||
coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
|
coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
|
||||||
sizes[i] = numext::mini(
|
sizes[i] = numext::mini(
|
||||||
|
@ -19,11 +19,33 @@ using Eigen::Index;
|
|||||||
using Eigen::RowMajor;
|
using Eigen::RowMajor;
|
||||||
using Eigen::ColMajor;
|
using Eigen::ColMajor;
|
||||||
|
|
||||||
|
using internal::TensorBlockShapeType;
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static const T& choose(int layout, const T& col, const T& row) {
|
static const T& choose(int layout, const T& col, const T& row) {
|
||||||
return layout == ColMajor ? col : row;
|
return layout == ColMajor ? col : row;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const TensorBlockShapeType RandomShape() {
|
||||||
|
return internal::random<bool>()
|
||||||
|
? internal::TensorBlockShapeType::kUniformAllDims
|
||||||
|
: internal::TensorBlockShapeType::kSkewedInnerDims;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int NumDims>
|
||||||
|
static std::size_t RandomTargetSize(const DSizes<Index, NumDims>& dims) {
|
||||||
|
return internal::random<int>(1, dims.TotalSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static T* GenerateRandomData(const Index& size) {
|
||||||
|
T* data = new T[size];
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
data[i] = internal::random<T>();
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
template <int Layout>
|
template <int Layout>
|
||||||
static void test_block_mapper_sanity()
|
static void test_block_mapper_sanity()
|
||||||
{
|
{
|
||||||
@ -75,9 +97,7 @@ static void test_block_mapper_sanity()
|
|||||||
template <typename T, int Layout, int NumDims>
|
template <typename T, int Layout, int NumDims>
|
||||||
static void UpdateCoeffSet(
|
static void UpdateCoeffSet(
|
||||||
const internal::TensorBlock<T, Index, 4, Layout>& block,
|
const internal::TensorBlock<T, Index, 4, Layout>& block,
|
||||||
Index first_coeff_index,
|
Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) {
|
||||||
int dim_index,
|
|
||||||
std::set<Index>* visited_coeffs) {
|
|
||||||
const DSizes<Index, NumDims> block_sizes = block.block_sizes();
|
const DSizes<Index, NumDims> block_sizes = block.block_sizes();
|
||||||
const DSizes<Index, NumDims> tensor_strides = block.tensor_strides();
|
const DSizes<Index, NumDims> tensor_strides = block.tensor_strides();
|
||||||
|
|
||||||
@ -103,18 +123,11 @@ static void test_block_mapper_maps_every_element()
|
|||||||
|
|
||||||
DSizes<Index, 4> dims(5, 7, 11, 17);
|
DSizes<Index, 4> dims(5, 7, 11, 17);
|
||||||
|
|
||||||
auto total_coeffs = static_cast<int>(dims.TotalSize());
|
|
||||||
|
|
||||||
// Keep track of elements indices available via block access.
|
// Keep track of elements indices available via block access.
|
||||||
std::set<Index> coeff_set;
|
std::set<Index> coeff_set;
|
||||||
|
|
||||||
// Try different combinations of block types and sizes.
|
// Try different combinations of block types and sizes.
|
||||||
auto block_shape_type =
|
TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
|
||||||
internal::random<bool>()
|
|
||||||
? internal::TensorBlockShapeType::kUniformAllDims
|
|
||||||
: internal::TensorBlockShapeType::kSkewedInnerDims;
|
|
||||||
auto block_target_size = internal::random<int>(1, total_coeffs);
|
|
||||||
TensorBlockMapper block_mapper(dims, block_shape_type, block_target_size);
|
|
||||||
|
|
||||||
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
|
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
|
||||||
@ -124,6 +137,7 @@ static void test_block_mapper_maps_every_element()
|
|||||||
|
|
||||||
// Verify that every coefficient in the original Tensor is accessible through
|
// Verify that every coefficient in the original Tensor is accessible through
|
||||||
// TensorBlock only once.
|
// TensorBlock only once.
|
||||||
|
auto total_coeffs = static_cast<int>(dims.TotalSize());
|
||||||
VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs);
|
VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs);
|
||||||
VERIFY_IS_EQUAL(*coeff_set.begin(), static_cast<Index>(0));
|
VERIFY_IS_EQUAL(*coeff_set.begin(), static_cast<Index>(0));
|
||||||
VERIFY_IS_EQUAL(*coeff_set.rbegin(), static_cast<Index>(total_coeffs - 1));
|
VERIFY_IS_EQUAL(*coeff_set.rbegin(), static_cast<Index>(total_coeffs - 1));
|
||||||
@ -146,13 +160,6 @@ static void test_slice_block_mapper_maps_every_element()
|
|||||||
|
|
||||||
auto total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
|
auto total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
|
||||||
|
|
||||||
// Try different combinations of block types and sizes.
|
|
||||||
auto block_shape_type =
|
|
||||||
internal::random<bool>()
|
|
||||||
? internal::TensorBlockShapeType::kUniformAllDims
|
|
||||||
: internal::TensorBlockShapeType::kSkewedInnerDims;
|
|
||||||
auto block_target_size = internal::random<int>(1, total_coeffs);
|
|
||||||
|
|
||||||
// Pick a random dimension sizes for the tensor blocks.
|
// Pick a random dimension sizes for the tensor blocks.
|
||||||
DSizes<Index, 4> block_sizes;
|
DSizes<Index, 4> block_sizes;
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
@ -164,7 +171,7 @@ static void test_slice_block_mapper_maps_every_element()
|
|||||||
DimensionList<Index, 4>());
|
DimensionList<Index, 4>());
|
||||||
|
|
||||||
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
||||||
TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
|
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
|
||||||
UpdateCoeffSet<T, Layout, 4>(block, block.first_coeff_index(),
|
UpdateCoeffSet<T, Layout, 4>(block, block.first_coeff_index(),
|
||||||
choose(Layout, 3, 0), &coeff_set);
|
choose(Layout, 3, 0), &coeff_set);
|
||||||
}
|
}
|
||||||
@ -172,11 +179,745 @@ static void test_slice_block_mapper_maps_every_element()
|
|||||||
VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs);
|
VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DECLARE_TEST(cxx11_tensor_assign) {
|
template <int Layout>
|
||||||
CALL_SUBTEST(test_block_mapper_sanity<ColMajor>());
|
static void test_block_io_copy_data_from_source_to_target()
|
||||||
CALL_SUBTEST(test_block_mapper_sanity<RowMajor>());
|
{
|
||||||
CALL_SUBTEST(test_block_mapper_maps_every_element<ColMajor>());
|
using T = float;
|
||||||
CALL_SUBTEST(test_block_mapper_maps_every_element<RowMajor>());
|
|
||||||
CALL_SUBTEST(test_slice_block_mapper_maps_every_element<ColMajor>());
|
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
|
||||||
CALL_SUBTEST(test_slice_block_mapper_maps_every_element<RowMajor>());
|
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
|
typedef internal::TensorBlockReader<T, Index, 5, Layout, true>
|
||||||
|
TensorBlockReader;
|
||||||
|
typedef internal::TensorBlockWriter<T, Index, 5, Layout, true>
|
||||||
|
TensorBlockWriter;
|
||||||
|
|
||||||
|
typedef std::vector<T, aligned_allocator<T>> DataVector;
|
||||||
|
|
||||||
|
DSizes<Index, 5> input_tensor_dims(5, 7, 11, 17, 3);
|
||||||
|
const auto input_tensor_size = input_tensor_dims.TotalSize();
|
||||||
|
DataVector input_data(input_tensor_size, 0);
|
||||||
|
for (int i = 0; i < input_tensor_size; ++i) {
|
||||||
|
input_data[i] = internal::random<T>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DataVector output_data(input_tensor_size, 0);
|
||||||
|
|
||||||
|
TensorBlockMapper block_mapper(input_tensor_dims, RandomShape(),
|
||||||
|
RandomTargetSize(input_tensor_dims));
|
||||||
|
|
||||||
|
DataVector block_data(block_mapper.block_dims_total_size(), 0);
|
||||||
|
for (int i = 0; i < block_mapper.total_block_count(); ++i) {
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(i, block_data.data());
|
||||||
|
TensorBlockReader::Run(&block, input_data.data());
|
||||||
|
TensorBlockWriter::Run(block, output_data.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < input_tensor_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(input_data[i], output_data[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout, int NumDims>
|
||||||
|
static int GetInputIndex(Index output_index,
|
||||||
|
const array<Index, NumDims>& output_to_input_dim_map,
|
||||||
|
const array<Index, NumDims>& input_strides,
|
||||||
|
const array<Index, NumDims>& output_strides) {
|
||||||
|
int input_index = 0;
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
|
const int idx = output_index / output_strides[i];
|
||||||
|
input_index += idx * input_strides[output_to_input_dim_map[i]];
|
||||||
|
output_index -= idx * output_strides[i];
|
||||||
|
}
|
||||||
|
return input_index +
|
||||||
|
output_index * input_strides[output_to_input_dim_map[0]];
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
|
const int idx = output_index / output_strides[i];
|
||||||
|
input_index += idx * input_strides[output_to_input_dim_map[i]];
|
||||||
|
output_index -= idx * output_strides[i];
|
||||||
|
}
|
||||||
|
return input_index +
|
||||||
|
output_index * input_strides[output_to_input_dim_map[NumDims - 1]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout, int NumDims>
|
||||||
|
static array<Index, NumDims> ComputeStrides(
|
||||||
|
const array<Index, NumDims>& sizes) {
|
||||||
|
array<Index, NumDims> strides;
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
strides[0] = 1;
|
||||||
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
|
strides[i] = strides[i - 1] * sizes[i - 1];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
strides[NumDims - 1] = 1;
|
||||||
|
for (int i = NumDims - 2; i >= 0; --i) {
|
||||||
|
strides[i] = strides[i + 1] * sizes[i + 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strides;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_io_copy_using_reordered_dimensions() {
|
||||||
|
typedef internal::TensorBlock<float, Index, 5, Layout> TensorBlock;
|
||||||
|
typedef internal::TensorBlockMapper<float, Index, 5, Layout>
|
||||||
|
TensorBlockMapper;
|
||||||
|
|
||||||
|
typedef internal::TensorBlockReader<float, Index, 5, Layout, false>
|
||||||
|
TensorBlockReader;
|
||||||
|
typedef internal::TensorBlockWriter<float, Index, 5, Layout, false>
|
||||||
|
TensorBlockWriter;
|
||||||
|
|
||||||
|
DSizes<Index, 5> input_tensor_dims(5, 7, 11, 17, 3);
|
||||||
|
const auto input_tensor_size = input_tensor_dims.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensor.
|
||||||
|
auto* input_data = GenerateRandomData<float>(input_tensor_size);
|
||||||
|
|
||||||
|
// Create a random dimension re-ordering/shuffle.
|
||||||
|
std::vector<Index> shuffle = {0, 1, 2, 3, 4};
|
||||||
|
std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
|
||||||
|
|
||||||
|
DSizes<Index, 5> output_tensor_dims;
|
||||||
|
array<Index, 5> input_to_output_dim_map;
|
||||||
|
array<Index, 5> output_to_input_dim_map;
|
||||||
|
for (Index i = 0; i < 5; ++i) {
|
||||||
|
output_tensor_dims[shuffle[i]] = input_tensor_dims[i];
|
||||||
|
input_to_output_dim_map[i] = shuffle[i];
|
||||||
|
output_to_input_dim_map[shuffle[i]] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random block shape and size.
|
||||||
|
TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(),
|
||||||
|
RandomTargetSize(input_tensor_dims));
|
||||||
|
|
||||||
|
auto* block_data = new float[block_mapper.block_dims_total_size()];
|
||||||
|
auto* output_data = new float[input_tensor_size];
|
||||||
|
|
||||||
|
array<Index, 5> input_tensor_strides =
|
||||||
|
ComputeStrides<Layout, 5>(input_tensor_dims);
|
||||||
|
array<Index, 5> output_tensor_strides =
|
||||||
|
ComputeStrides<Layout, 5>(output_tensor_dims);
|
||||||
|
|
||||||
|
for (Index i = 0; i < block_mapper.total_block_count(); ++i) {
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(i, block_data);
|
||||||
|
const Index first_coeff_index = GetInputIndex<Layout, 5>(
|
||||||
|
block.first_coeff_index(), output_to_input_dim_map,
|
||||||
|
input_tensor_strides, output_tensor_strides);
|
||||||
|
TensorBlockReader::Run(&block, first_coeff_index, input_to_output_dim_map,
|
||||||
|
input_tensor_strides, input_data);
|
||||||
|
TensorBlockWriter::Run(block, first_coeff_index, input_to_output_dim_map,
|
||||||
|
input_tensor_strides, output_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < input_tensor_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(input_data[i], output_data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] input_data;
|
||||||
|
delete[] block_data;
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_io_zero_stride()
|
||||||
|
{
|
||||||
|
typedef internal::TensorBlock<float, Index, 5, Layout> TensorBlock;
|
||||||
|
typedef internal::TensorBlockReader<float, Index, 5, Layout, true>
|
||||||
|
TensorBlockReader;
|
||||||
|
typedef internal::TensorBlockWriter<float, Index, 5, Layout, true>
|
||||||
|
TensorBlockWriter;
|
||||||
|
|
||||||
|
DSizes<Index, 5> input_tensor_dims(1, 2, 1, 3, 1);
|
||||||
|
const auto input_tensor_size = input_tensor_dims.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensor.
|
||||||
|
auto* input_data = GenerateRandomData<float>(input_tensor_size);
|
||||||
|
|
||||||
|
DSizes<Index, 5> output_tensor_dims(3, 2, 3, 3, 2);
|
||||||
|
|
||||||
|
DSizes<Index, 5> input_tensor_strides(
|
||||||
|
ComputeStrides<Layout, 5>(input_tensor_dims));
|
||||||
|
DSizes<Index, 5> output_tensor_strides(
|
||||||
|
ComputeStrides<Layout, 5>(output_tensor_dims));
|
||||||
|
|
||||||
|
DSizes<Index, 5> input_tensor_strides_with_zeros(input_tensor_strides);
|
||||||
|
input_tensor_strides_with_zeros[0] = 0;
|
||||||
|
input_tensor_strides_with_zeros[2] = 0;
|
||||||
|
input_tensor_strides_with_zeros[4] = 0;
|
||||||
|
|
||||||
|
// Verify that data was correctly read/written from/into the block.
|
||||||
|
const auto verify_is_equal = [&](const float* output_data) {
|
||||||
|
for (int i = 0; i < output_tensor_dims[0]; ++i) {
|
||||||
|
for (int j = 0; j < output_tensor_dims[1]; ++j) {
|
||||||
|
for (int k = 0; k < output_tensor_dims[2]; ++k) {
|
||||||
|
for (int l = 0; l < output_tensor_dims[3]; ++l) {
|
||||||
|
for (int m = 0; m < output_tensor_dims[4]; ++m) {
|
||||||
|
const Index output_offset =
|
||||||
|
i * output_tensor_strides[0] + j * output_tensor_strides[1] +
|
||||||
|
k * output_tensor_strides[2] + l * output_tensor_strides[3] +
|
||||||
|
m * output_tensor_strides[4];
|
||||||
|
const Index input_offset =
|
||||||
|
i % input_tensor_dims[0] * input_tensor_strides[0] +
|
||||||
|
j % input_tensor_dims[1] * input_tensor_strides[1] +
|
||||||
|
k % input_tensor_dims[2] * input_tensor_strides[2] +
|
||||||
|
l % input_tensor_dims[3] * input_tensor_strides[3] +
|
||||||
|
m % input_tensor_dims[4] * input_tensor_strides[4];
|
||||||
|
VERIFY_IS_EQUAL(output_data[output_offset],
|
||||||
|
input_data[input_offset]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
auto* output_data = new float[output_tensor_dims.TotalSize()];
|
||||||
|
TensorBlock read_block(0, output_tensor_dims, output_tensor_strides,
|
||||||
|
input_tensor_strides_with_zeros, output_data);
|
||||||
|
TensorBlockReader::Run(&read_block, input_data);
|
||||||
|
verify_is_equal(output_data);
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto* output_data = new float[output_tensor_dims.TotalSize()];
|
||||||
|
TensorBlock write_block(0, output_tensor_dims,
|
||||||
|
input_tensor_strides_with_zeros,
|
||||||
|
output_tensor_strides, input_data);
|
||||||
|
TensorBlockWriter::Run(write_block, output_data);
|
||||||
|
verify_is_equal(output_data);
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] input_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_io_squeeze_ones() {
|
||||||
|
typedef internal::TensorBlock<float, Index, 5, Layout> TensorBlock;
|
||||||
|
typedef internal::TensorBlockReader<float, Index, 5, Layout, true>
|
||||||
|
TensorBlockReader;
|
||||||
|
typedef internal::TensorBlockWriter<float, Index, 5, Layout, true>
|
||||||
|
TensorBlockWriter;
|
||||||
|
|
||||||
|
// Total size > 1.
|
||||||
|
{
|
||||||
|
DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
|
||||||
|
const auto total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensor.
|
||||||
|
auto* input_data = GenerateRandomData<float>(total_size);
|
||||||
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
|
{
|
||||||
|
auto* output_data = new float[block_sizes.TotalSize()];
|
||||||
|
TensorBlock read_block(0, block_sizes, strides, strides, output_data);
|
||||||
|
TensorBlockReader::Run(&read_block, input_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], input_data[i]);
|
||||||
|
}
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto* output_data = new float[block_sizes.TotalSize()];
|
||||||
|
TensorBlock write_block(0, block_sizes, strides, strides, input_data);
|
||||||
|
TensorBlockWriter::Run(write_block, output_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], input_data[i]);
|
||||||
|
}
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Total size == 1.
|
||||||
|
{
|
||||||
|
DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
|
||||||
|
const auto total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensor.
|
||||||
|
auto* input_data = GenerateRandomData<float>(total_size);
|
||||||
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
|
{
|
||||||
|
auto* output_data = new float[block_sizes.TotalSize()];
|
||||||
|
TensorBlock read_block(0, block_sizes, strides, strides, output_data);
|
||||||
|
TensorBlockReader::Run(&read_block, input_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], input_data[i]);
|
||||||
|
}
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto* output_data = new float[block_sizes.TotalSize()];
|
||||||
|
TensorBlock write_block(0, block_sizes, strides, strides, input_data);
|
||||||
|
TensorBlockWriter::Run(write_block, output_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], input_data[i]);
|
||||||
|
}
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_cwise_binary_io_basic() {
|
||||||
|
typedef internal::scalar_sum_op<float> BinaryFunctor;
|
||||||
|
typedef internal::TensorBlockCwiseBinaryIO<BinaryFunctor, Index, float, 5,
|
||||||
|
Layout>
|
||||||
|
TensorBlockCwiseBinaryIO;
|
||||||
|
|
||||||
|
DSizes<Index, 5> block_sizes(2, 3, 5, 7, 11);
|
||||||
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
|
const auto total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensors.
|
||||||
|
auto* left_data = GenerateRandomData<float>(total_size);
|
||||||
|
auto* right_data = GenerateRandomData<float>(total_size);
|
||||||
|
|
||||||
|
auto* output_data = new float[total_size];
|
||||||
|
BinaryFunctor functor;
|
||||||
|
TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data,
|
||||||
|
strides, left_data, strides, right_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], functor(left_data[i], right_data[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] left_data;
|
||||||
|
delete[] right_data;
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_cwise_binary_io_squeeze_ones() {
|
||||||
|
typedef internal::scalar_sum_op<float> BinaryFunctor;
|
||||||
|
typedef internal::TensorBlockCwiseBinaryIO<BinaryFunctor, Index, float, 5,
|
||||||
|
Layout>
|
||||||
|
TensorBlockCwiseBinaryIO;
|
||||||
|
|
||||||
|
DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1);
|
||||||
|
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
|
||||||
|
|
||||||
|
const auto total_size = block_sizes.TotalSize();
|
||||||
|
|
||||||
|
// Create a random input tensors.
|
||||||
|
auto* left_data = GenerateRandomData<float>(total_size);
|
||||||
|
auto* right_data = GenerateRandomData<float>(total_size);
|
||||||
|
|
||||||
|
auto* output_data = new float[total_size];
|
||||||
|
BinaryFunctor functor;
|
||||||
|
TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data,
|
||||||
|
strides, left_data, strides, right_data);
|
||||||
|
for (int i = 0; i < total_size; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(output_data[i], functor(left_data[i], right_data[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] left_data;
|
||||||
|
delete[] right_data;
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_block_cwise_binary_io_zero_strides() {
|
||||||
|
typedef internal::scalar_sum_op<float> BinaryFunctor;
|
||||||
|
typedef internal::TensorBlockCwiseBinaryIO<BinaryFunctor, Index, float, 5,
|
||||||
|
Layout>
|
||||||
|
TensorBlockCwiseBinaryIO;
|
||||||
|
|
||||||
|
DSizes<Index, 5> left_sizes(1, 3, 1, 7, 1);
|
||||||
|
DSizes<Index, 5> left_strides(ComputeStrides<Layout, 5>(left_sizes));
|
||||||
|
left_strides[0] = 0;
|
||||||
|
left_strides[2] = 0;
|
||||||
|
left_strides[4] = 0;
|
||||||
|
|
||||||
|
DSizes<Index, 5> right_sizes(2, 1, 5, 1, 11);
|
||||||
|
DSizes<Index, 5> right_strides(ComputeStrides<Layout, 5>(right_sizes));
|
||||||
|
right_strides[1] = 0;
|
||||||
|
right_strides[3] = 0;
|
||||||
|
|
||||||
|
// Generate random data.
|
||||||
|
auto* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
|
||||||
|
auto* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
|
||||||
|
|
||||||
|
DSizes<Index, 5> output_sizes(2, 3, 5, 7, 11);
|
||||||
|
DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes));
|
||||||
|
|
||||||
|
const auto output_total_size = output_sizes.TotalSize();
|
||||||
|
auto* output_data = new float[output_total_size];
|
||||||
|
|
||||||
|
BinaryFunctor functor;
|
||||||
|
TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides,
|
||||||
|
output_data, left_strides, left_data,
|
||||||
|
right_strides, right_data);
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
for (int j = 0; j < 3; ++j) {
|
||||||
|
for (int k = 0; k < 5; ++k) {
|
||||||
|
for (int l = 0; l < 7; ++l) {
|
||||||
|
for (int m = 0; m < 11; ++m) {
|
||||||
|
Index output_index = i * output_strides[0] + j * output_strides[1] +
|
||||||
|
k * output_strides[2] + l * output_strides[3] +
|
||||||
|
m * output_strides[4];
|
||||||
|
Index left_index = i * left_strides[0] + j * left_strides[1] +
|
||||||
|
k * left_strides[2] + l * left_strides[3] +
|
||||||
|
m * left_strides[4];
|
||||||
|
Index right_index = i * right_strides[0] + j * right_strides[1] +
|
||||||
|
k * right_strides[2] + l * right_strides[3] +
|
||||||
|
m * right_strides[4];
|
||||||
|
VERIFY_IS_EQUAL(
|
||||||
|
output_data[output_index],
|
||||||
|
functor(left_data[left_index], right_data[right_index]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] left_data;
|
||||||
|
delete[] right_data;
|
||||||
|
delete[] output_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_uniform_block_shape()
|
||||||
|
{
|
||||||
|
using T = int;
|
||||||
|
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
|
||||||
|
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
|
{
|
||||||
|
// Test shape 'UniformAllDims' with uniform 'max_coeff count'.
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
for (int i = 0; i < 5; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
||||||
|
// partially into first inner-most dimension.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
||||||
|
for (int i = 1; i < 5; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
||||||
|
for (int i = 3; i >= 0; --i) {
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
||||||
|
// fully into first inner-most dimension.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
|
for (int i = 1; i < 5; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
for (int i = 3; i >= 0; --i) {
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
|
||||||
|
// fully into first few inner-most dimensions.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[3]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[4]);
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||||
|
const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[0]);
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'UniformAllDims' with full allocation to all dims.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(7, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
|
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(7, 5, 6, 9, 7);
|
||||||
|
const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_skewed_inner_dim_block_shape()
|
||||||
|
{
|
||||||
|
using T = int;
|
||||||
|
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
|
||||||
|
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
|
||||||
|
|
||||||
|
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
|
||||||
|
for (int i = 1; i < 5; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
|
||||||
|
for (int i = 3; i >= 0; --i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
|
for (int i = 1; i < 5; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
for (int i = 3; i >= 0; --i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
|
||||||
|
// and partial allocation to second inner-dim.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
|
VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
|
||||||
|
for (int i = 2; i < 5; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
|
||||||
|
for (int i = 2; i >= 0; --i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
|
||||||
|
// and partial allocation to third inner-dim.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
||||||
|
for (int i = 3; i < 5; ++i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
|
||||||
|
for (int i = 1; i >= 0; --i) {
|
||||||
|
VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
|
||||||
|
}
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test shape 'SkewedInnerDims' with full allocation to all dims.
|
||||||
|
if (Layout == ColMajor) {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
|
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
} else {
|
||||||
|
DSizes<Index, 5> dims(11, 5, 6, 17, 7);
|
||||||
|
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
|
||||||
|
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
|
||||||
|
max_coeff_count);
|
||||||
|
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
|
||||||
|
VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
|
||||||
|
VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
|
||||||
|
VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
|
||||||
|
VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
|
||||||
|
VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
|
||||||
|
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Layout>
|
||||||
|
static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
|
||||||
|
{
|
||||||
|
using T = int;
|
||||||
|
|
||||||
|
// Test blocking of tensors with zero dimensions:
|
||||||
|
// - we must not crash on asserts and divisions by zero
|
||||||
|
// - we must not return block with zero dimensions
|
||||||
|
// (recipe for overflows/underflows, divisions by zero and NaNs later)
|
||||||
|
// - total block count must be zero
|
||||||
|
{
|
||||||
|
typedef internal::TensorBlockMapper<T, Index, 1, Layout> TensorBlockMapper;
|
||||||
|
DSizes<Index, 1> dims(0);
|
||||||
|
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||||
|
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
|
||||||
|
VERIFY_IS_EQUAL(block_mapper.total_block_count(), 0);
|
||||||
|
VERIFY(block_mapper.block_dims_total_size() >= 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
typedef internal::TensorBlockMapper<T, Index, 2, Layout> TensorBlockMapper;
|
||||||
|
for (int dim1 = 0; dim1 < 3; ++dim1) {
|
||||||
|
for (int dim2 = 0; dim2 < 3; ++dim2) {
|
||||||
|
DSizes<Index, 2> dims(dim1, dim2);
|
||||||
|
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
|
||||||
|
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
|
||||||
|
if (dim1 * dim2 == 0) {
|
||||||
|
VERIFY_IS_EQUAL(block_mapper.total_block_count(), 0);
|
||||||
|
}
|
||||||
|
VERIFY(block_mapper.block_dims_total_size() >= 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CALL_SUBTEST_LAYOUTS(NAME) \
|
||||||
|
CALL_SUBTEST(NAME<ColMajor>()); \
|
||||||
|
CALL_SUBTEST(NAME<RowMajor>())
|
||||||
|
|
||||||
|
#define CALL_SUBTEST_LAYOUTS_WITH_ARG(NAME, ARG) \
|
||||||
|
CALL_SUBTEST(NAME<ColMajor>(ARG)); \
|
||||||
|
CALL_SUBTEST(NAME<RowMajor>(ARG))
|
||||||
|
|
||||||
|
EIGEN_DECLARE_TEST(cxx11_tensor_assign) {
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_mapper_sanity);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_mapper_maps_every_element);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_slice_block_mapper_maps_every_element);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_io_copy_data_from_source_to_target);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_io_copy_using_reordered_dimensions);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_io_zero_stride);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_io_squeeze_ones);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_cwise_binary_io_basic);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_cwise_binary_io_squeeze_ones);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_uniform_block_shape);
|
||||||
|
CALL_SUBTEST_LAYOUTS(test_skewed_inner_dim_block_shape);
|
||||||
|
|
||||||
|
CALL_SUBTEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims);
|
||||||
|
CALL_SUBTEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef CALL_SUBTEST_LAYOUTS
|
||||||
|
#undef CALL_SUBTEST_LAYOUTS_WITH_ARG
|
Loading…
x
Reference in New Issue
Block a user