mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Block evaluation for TensorGenerator + TensorReverse + fixed bug in tensor reverse op
This commit is contained in:
parent
b03eb63d7c
commit
a411e9f344
@ -131,7 +131,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
|||||||
ArgTensorBlock;
|
ArgTensorBlock;
|
||||||
|
|
||||||
typedef internal::TensorBlockAssignment<
|
typedef internal::TensorBlockAssignment<
|
||||||
Scalar, NumDims, typename ArgTensorBlock::XprType, Index>
|
CoeffReturnType, NumDims, typename ArgTensorBlock::XprType, Index>
|
||||||
TensorBlockAssignment;
|
TensorBlockAssignment;
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
BlockAccess = true,
|
BlockAccess = true,
|
||||||
BlockAccessV2 = false,
|
BlockAccessV2 = true,
|
||||||
PreferBlockAccess = true,
|
PreferBlockAccess = true,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
@ -107,7 +107,12 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
TensorBlock;
|
TensorBlock;
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
|
|
||||||
|
typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
|
||||||
|
Layout, Index>
|
||||||
|
TensorBlockV2;
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
@ -232,6 +237,78 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||||
|
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const {
|
||||||
|
static const bool is_col_major =
|
||||||
|
static_cast<int>(Layout) == static_cast<int>(ColMajor);
|
||||||
|
|
||||||
|
// Compute spatial coordinates for the first block element.
|
||||||
|
array<Index, NumDims> coords;
|
||||||
|
extract_coordinates(desc.offset(), coords);
|
||||||
|
array<Index, NumDims> initial_coords = coords;
|
||||||
|
|
||||||
|
// Try to reuse destination as an output block buffer.
|
||||||
|
CoeffReturnType* block_buffer =
|
||||||
|
desc.template destination<CoeffReturnType, Layout>();
|
||||||
|
bool materialized_in_output;
|
||||||
|
|
||||||
|
if (block_buffer != NULL) {
|
||||||
|
materialized_in_output = true;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
materialized_in_output = false;
|
||||||
|
void* mem = scratch.allocate(desc.size() * sizeof(CoeffReturnType));
|
||||||
|
block_buffer = static_cast<CoeffReturnType*>(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset in the output block buffer.
|
||||||
|
Index offset = 0;
|
||||||
|
|
||||||
|
// Initialize output block iterator state. Dimension in this array are
|
||||||
|
// always in inner_most -> outer_most order (col major layout).
|
||||||
|
array<BlockIteratorState, NumDims> it;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
const int dim = is_col_major ? i : NumDims - 1 - i;
|
||||||
|
it[i].size = desc.dimension(dim);
|
||||||
|
it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride);
|
||||||
|
it[i].span = it[i].stride * (it[i].size - 1);
|
||||||
|
it[i].count = 0;
|
||||||
|
}
|
||||||
|
eigen_assert(it[0].stride == 1);
|
||||||
|
|
||||||
|
while (it[NumDims - 1].count < it[NumDims - 1].size) {
|
||||||
|
// Generate data for the inner-most dimension.
|
||||||
|
for (Index i = 0; i < it[0].size; ++i) {
|
||||||
|
*(block_buffer + offset + i) = m_generator(coords);
|
||||||
|
coords[is_col_major ? 0 : NumDims - 1]++;
|
||||||
|
}
|
||||||
|
coords[is_col_major ? 0 : NumDims - 1] =
|
||||||
|
initial_coords[is_col_major ? 0 : NumDims - 1];
|
||||||
|
|
||||||
|
// For the 1d tensor we need to generate only one inner-most dimension.
|
||||||
|
if (NumDims == 1) break;
|
||||||
|
|
||||||
|
// Update offset.
|
||||||
|
for (Index i = 1; i < NumDims; ++i) {
|
||||||
|
if (++it[i].count < it[i].size) {
|
||||||
|
offset += it[i].stride;
|
||||||
|
coords[is_col_major ? i : NumDims - 1 - i]++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (i != NumDims - 1) it[i].count = 0;
|
||||||
|
coords[is_col_major ? i : NumDims - 1 - i] =
|
||||||
|
initial_coords[is_col_major ? i : NumDims - 1 - i];
|
||||||
|
offset -= it[i].span;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TensorBlockV2(
|
||||||
|
materialized_in_output
|
||||||
|
? internal::TensorBlockKind::kMaterializedInOutput
|
||||||
|
: internal::TensorBlockKind::kMaterializedInScratch,
|
||||||
|
block_buffer, desc.dimensions());
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
||||||
costPerCoeff(bool) const {
|
costPerCoeff(bool) const {
|
||||||
// TODO(rmlarsen): This is just a placeholder. Define interface to make
|
// TODO(rmlarsen): This is just a placeholder. Define interface to make
|
||||||
|
@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = true,
|
BlockAccess = true,
|
||||||
BlockAccessV2 = false,
|
BlockAccessV2 = NumDims > 0,
|
||||||
PreferBlockAccess = true,
|
PreferBlockAccess = true,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
@ -130,7 +130,15 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
OutputTensorBlock;
|
OutputTensorBlock;
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
|
|
||||||
|
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||||
|
ArgTensorBlock;
|
||||||
|
|
||||||
|
typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
|
||||||
|
Layout, Index>
|
||||||
|
TensorBlockV2;
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
||||||
@ -240,17 +248,6 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
internal::kSkewedInnerDims, block_total_size_max));
|
internal::kSkewedInnerDims, block_total_size_max));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct BlockIteratorState {
|
|
||||||
Index block_size;
|
|
||||||
Index block_stride;
|
|
||||||
Index block_span;
|
|
||||||
Index input_size;
|
|
||||||
Index input_stride;
|
|
||||||
Index input_span;
|
|
||||||
Index count;
|
|
||||||
bool reverse;
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
|
||||||
OutputTensorBlock* output_block) const {
|
OutputTensorBlock* output_block) const {
|
||||||
if (NumDims <= 0) return;
|
if (NumDims <= 0) return;
|
||||||
@ -278,15 +275,16 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
array<BlockIteratorState, NumDims> it;
|
array<BlockIteratorState, NumDims> it;
|
||||||
for (Index i = 0; i < NumDims; ++i) {
|
for (Index i = 0; i < NumDims; ++i) {
|
||||||
const Index dim = isColMajor ? i : NumDims - 1 - i;
|
const Index dim = isColMajor ? i : NumDims - 1 - i;
|
||||||
it[i].block_size = output_block->block_sizes()[dim];
|
it[i].size = output_block->block_sizes()[dim];
|
||||||
it[i].block_stride = output_block->block_strides()[dim];
|
|
||||||
it[i].block_span = it[i].block_stride * (it[i].block_size - 1);
|
|
||||||
it[i].input_size = m_dimensions[dim];
|
|
||||||
it[i].input_stride = m_strides[dim];
|
|
||||||
it[i].input_span = it[i].input_stride * (it[i].input_size - 1);
|
|
||||||
it[i].count = 0;
|
it[i].count = 0;
|
||||||
it[i].reverse = m_reverse[dim];
|
it[i].reverse = m_reverse[dim];
|
||||||
|
|
||||||
|
it[i].block_stride = output_block->block_strides()[dim];
|
||||||
|
it[i].block_span = it[i].block_stride * (it[i].size - 1);
|
||||||
|
|
||||||
|
it[i].input_stride = m_strides[dim];
|
||||||
|
it[i].input_span = it[i].input_stride * (it[i].size - 1);
|
||||||
|
|
||||||
if (it[i].reverse) {
|
if (it[i].reverse) {
|
||||||
it[i].input_stride = -1 * it[i].input_stride;
|
it[i].input_stride = -1 * it[i].input_stride;
|
||||||
it[i].input_span = -1 * it[i].input_span;
|
it[i].input_span = -1 * it[i].input_span;
|
||||||
@ -298,17 +296,16 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
int effective_inner_dim = 0;
|
int effective_inner_dim = 0;
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
if (it[i].reverse != it[effective_inner_dim].reverse) break;
|
if (it[i].reverse != it[effective_inner_dim].reverse) break;
|
||||||
if (it[i].block_stride != it[effective_inner_dim].input_size) break;
|
if (it[i].block_stride != it[effective_inner_dim].size) break;
|
||||||
if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
|
if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
|
||||||
|
|
||||||
it[i].block_size = it[effective_inner_dim].block_size * it[i].block_size;
|
it[i].size = it[effective_inner_dim].size * it[i].size;
|
||||||
it[i].input_size = it[effective_inner_dim].input_size * it[i].input_size;
|
|
||||||
|
|
||||||
it[i].block_stride = 1;
|
it[i].block_stride = 1;
|
||||||
it[i].input_stride = (inner_dim_reversed ? -1 : 1);
|
it[i].input_stride = (inner_dim_reversed ? -1 : 1);
|
||||||
|
|
||||||
it[i].block_span = it[i].block_stride * (it[i].block_size - 1);
|
it[i].block_span = it[i].block_stride * (it[i].size - 1);
|
||||||
it[i].input_span = it[i].input_stride * (it[i].input_size - 1);
|
it[i].input_span = it[i].input_stride * (it[i].size - 1);
|
||||||
|
|
||||||
effective_inner_dim = i;
|
effective_inner_dim = i;
|
||||||
}
|
}
|
||||||
@ -317,9 +314,9 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
eigen_assert(it[effective_inner_dim].input_stride ==
|
eigen_assert(it[effective_inner_dim].input_stride ==
|
||||||
(inner_dim_reversed ? -1 : 1));
|
(inner_dim_reversed ? -1 : 1));
|
||||||
|
|
||||||
const Index inner_dim_size = it[effective_inner_dim].block_size;
|
const Index inner_dim_size = it[effective_inner_dim].size;
|
||||||
|
|
||||||
while (it[NumDims - 1].count < it[NumDims - 1].block_size) {
|
while (it[NumDims - 1].count < it[NumDims - 1].size) {
|
||||||
// Copy inner-most dimension data from reversed location in input.
|
// Copy inner-most dimension data from reversed location in input.
|
||||||
Index dst = block_offset;
|
Index dst = block_offset;
|
||||||
Index src = input_offset;
|
Index src = input_offset;
|
||||||
@ -345,7 +342,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
|
|
||||||
// Update offset.
|
// Update offset.
|
||||||
for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
|
for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
|
||||||
if (++it[i].count < it[i].block_size) {
|
if (++it[i].count < it[i].size) {
|
||||||
block_offset += it[i].block_stride;
|
block_offset += it[i].block_stride;
|
||||||
input_offset += it[i].input_stride;
|
input_offset += it[i].input_stride;
|
||||||
break;
|
break;
|
||||||
@ -357,6 +354,131 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||||
|
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const {
|
||||||
|
// TODO(ezhulenev): If underlying tensor expression supports and prefers
|
||||||
|
// block evaluation we must use it. Currently we use coeff and packet
|
||||||
|
// access into the underlying tensor expression.
|
||||||
|
// static const bool useBlockAccessForArgType =
|
||||||
|
// TensorEvaluator<ArgType, Device>::BlockAccess &&
|
||||||
|
// TensorEvaluator<ArgType, Device>::PreferBlockAccess;
|
||||||
|
|
||||||
|
static const bool isColMajor =
|
||||||
|
static_cast<int>(Layout) == static_cast<int>(ColMajor);
|
||||||
|
|
||||||
|
static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
|
||||||
|
const bool inner_dim_reversed = m_reverse[inner_dim_idx];
|
||||||
|
|
||||||
|
// Try to reuse destination as an output block buffer.
|
||||||
|
CoeffReturnType* block_buffer = desc.template destination<CoeffReturnType, Layout>();
|
||||||
|
bool materialized_in_output;
|
||||||
|
|
||||||
|
if (block_buffer != NULL) {
|
||||||
|
materialized_in_output = true;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
materialized_in_output = false;
|
||||||
|
void* mem = scratch.allocate(desc.size() * sizeof(CoeffReturnType));
|
||||||
|
block_buffer = static_cast<CoeffReturnType*>(mem);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset in the output block.
|
||||||
|
Index block_offset = 0;
|
||||||
|
|
||||||
|
// Offset in the input Tensor.
|
||||||
|
Index input_offset = reverseIndex(desc.offset());
|
||||||
|
|
||||||
|
// Initialize output block iterator state. Dimension in this array are
|
||||||
|
// always in inner_most -> outer_most order (col major layout).
|
||||||
|
array<BlockIteratorState, NumDims> it;
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
const int dim = isColMajor ? i : NumDims - 1 - i;
|
||||||
|
it[i].size = desc.dimension(dim);
|
||||||
|
it[i].count = 0;
|
||||||
|
it[i].reverse = m_reverse[dim];
|
||||||
|
|
||||||
|
it[i].block_stride =
|
||||||
|
i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
|
||||||
|
it[i].block_span = it[i].block_stride * (it[i].size - 1);
|
||||||
|
|
||||||
|
it[i].input_stride = m_strides[dim];
|
||||||
|
it[i].input_span = it[i].input_stride * (it[i].size - 1);
|
||||||
|
|
||||||
|
if (it[i].reverse) {
|
||||||
|
it[i].input_stride = -1 * it[i].input_stride;
|
||||||
|
it[i].input_span = -1 * it[i].input_span;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If multiple inner dimensions have the same reverse flag, check if we can
|
||||||
|
// merge them into a single virtual inner dimension.
|
||||||
|
int effective_inner_dim = 0;
|
||||||
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
|
if (it[i].reverse != it[effective_inner_dim].reverse) break;
|
||||||
|
if (it[i].block_stride != it[effective_inner_dim].size) break;
|
||||||
|
if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
|
||||||
|
|
||||||
|
it[i].size = it[effective_inner_dim].size * it[i].size;
|
||||||
|
|
||||||
|
it[i].block_stride = 1;
|
||||||
|
it[i].input_stride = (inner_dim_reversed ? -1 : 1);
|
||||||
|
|
||||||
|
it[i].block_span = it[i].block_stride * (it[i].size - 1);
|
||||||
|
it[i].input_span = it[i].input_stride * (it[i].size - 1);
|
||||||
|
|
||||||
|
effective_inner_dim = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
eigen_assert(it[effective_inner_dim].block_stride == 1);
|
||||||
|
eigen_assert(it[effective_inner_dim].input_stride ==
|
||||||
|
(inner_dim_reversed ? -1 : 1));
|
||||||
|
|
||||||
|
const Index inner_dim_size = it[effective_inner_dim].size;
|
||||||
|
|
||||||
|
while (it[NumDims - 1].count < it[NumDims - 1].size) {
|
||||||
|
// Copy inner-most dimension data from reversed location in input.
|
||||||
|
Index dst = block_offset;
|
||||||
|
Index src = input_offset;
|
||||||
|
|
||||||
|
// NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
|
||||||
|
// worse results in benchmarks than a simple coefficient loop.
|
||||||
|
if (inner_dim_reversed) {
|
||||||
|
for (Index i = 0; i < inner_dim_size; ++i) {
|
||||||
|
block_buffer[dst] = m_impl.coeff(src);
|
||||||
|
++dst;
|
||||||
|
--src;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (Index i = 0; i < inner_dim_size; ++i) {
|
||||||
|
block_buffer[dst] = m_impl.coeff(src);
|
||||||
|
++dst;
|
||||||
|
++src;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the 1d tensor we need to generate only one inner-most dimension.
|
||||||
|
if ((NumDims - effective_inner_dim) == 1) break;
|
||||||
|
|
||||||
|
// Update offset.
|
||||||
|
for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
|
||||||
|
if (++it[i].count < it[i].size) {
|
||||||
|
block_offset += it[i].block_stride;
|
||||||
|
input_offset += it[i].input_stride;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (i != NumDims - 1) it[i].count = 0;
|
||||||
|
block_offset -= it[i].block_span;
|
||||||
|
input_offset -= it[i].input_span;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TensorBlockV2(
|
||||||
|
materialized_in_output
|
||||||
|
? internal::TensorBlockKind::kMaterializedInOutput
|
||||||
|
: internal::TensorBlockKind::kMaterializedInScratch,
|
||||||
|
block_buffer, desc.dimensions());
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||||
double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
|
double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
|
||||||
2 * TensorOpCost::MulCost<Index>() +
|
2 * TensorOpCost::MulCost<Index>() +
|
||||||
@ -386,6 +508,26 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
ReverseDimensions m_reverse;
|
ReverseDimensions m_reverse;
|
||||||
const Device EIGEN_DEVICE_REF m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct BlockIteratorState {
|
||||||
|
BlockIteratorState()
|
||||||
|
: size(0),
|
||||||
|
count(0),
|
||||||
|
reverse(false),
|
||||||
|
block_stride(0),
|
||||||
|
block_span(0),
|
||||||
|
input_stride(0),
|
||||||
|
input_span(0) {}
|
||||||
|
|
||||||
|
Index size;
|
||||||
|
Index count;
|
||||||
|
bool reverse;
|
||||||
|
Index block_stride;
|
||||||
|
Index block_span;
|
||||||
|
Index input_stride;
|
||||||
|
Index input_span;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Eval as lvalue
|
// Eval as lvalue
|
||||||
|
@ -369,6 +369,48 @@ static void test_eval_tensor_chipping() {
|
|||||||
[&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
|
[&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, int NumDims, int Layout>
|
||||||
|
static void test_eval_tensor_generator() {
|
||||||
|
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
|
||||||
|
Tensor<T, NumDims, Layout> input(dims);
|
||||||
|
input.setRandom();
|
||||||
|
|
||||||
|
auto generator = [](const array<Index, NumDims>& dims) -> T {
|
||||||
|
T result = static_cast<T>(0);
|
||||||
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
|
result += static_cast<T>((i + 1) * dims[i]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
VerifyBlockEvaluator<T, NumDims, Layout>(
|
||||||
|
input.generate(generator),
|
||||||
|
[&dims]() { return FixedSizeBlock(dims); });
|
||||||
|
|
||||||
|
VerifyBlockEvaluator<T, NumDims, Layout>(
|
||||||
|
input.generate(generator),
|
||||||
|
[&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, int NumDims, int Layout>
|
||||||
|
static void test_eval_tensor_reverse() {
|
||||||
|
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
|
||||||
|
Tensor<T, NumDims, Layout> input(dims);
|
||||||
|
input.setRandom();
|
||||||
|
|
||||||
|
// Randomly reverse dimensions.
|
||||||
|
Eigen::DSizes<bool, NumDims> reverse;
|
||||||
|
for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
|
||||||
|
|
||||||
|
VerifyBlockEvaluator<T, NumDims, Layout>(
|
||||||
|
input.reverse(reverse),
|
||||||
|
[&dims]() { return FixedSizeBlock(dims); });
|
||||||
|
|
||||||
|
VerifyBlockEvaluator<T, NumDims, Layout>(
|
||||||
|
input.reverse(reverse),
|
||||||
|
[&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, int Layout>
|
template <typename T, int Layout>
|
||||||
static void test_eval_tensor_reshape_with_bcast() {
|
static void test_eval_tensor_reshape_with_bcast() {
|
||||||
Index dim = internal::random<Index>(1, 100);
|
Index dim = internal::random<Index>(1, 100);
|
||||||
@ -573,6 +615,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) {
|
|||||||
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_select);
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_select);
|
||||||
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding);
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding);
|
||||||
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_chipping);
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_chipping);
|
||||||
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_generator);
|
||||||
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_reverse);
|
||||||
|
|
||||||
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast);
|
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast);
|
||||||
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval);
|
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval);
|
||||||
|
@ -539,7 +539,7 @@ static void test_execute_reverse_rvalue(Device d)
|
|||||||
|
|
||||||
// Reverse half of the dimensions.
|
// Reverse half of the dimensions.
|
||||||
Eigen::array<bool, NumDims> reverse;
|
Eigen::array<bool, NumDims> reverse;
|
||||||
for (int i = 0; i < NumDims; ++i) reverse[i] = (dims[i] % 2 == 0);
|
for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
|
||||||
|
|
||||||
const auto expr = src.reverse(reverse);
|
const auto expr = src.reverse(reverse);
|
||||||
|
|
||||||
@ -756,16 +756,16 @@ EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
|
|||||||
CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 4);
|
CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 4);
|
||||||
CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 5);
|
CALL_SUBTEST_COMBINATIONS_V2(12, test_execute_broadcasting_of_forced_eval, float, 5);
|
||||||
|
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(13, test_execute_generator_op, float, 2);
|
CALL_SUBTEST_COMBINATIONS_V2(13, test_execute_generator_op, float, 2);
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(13, test_execute_generator_op, float, 3);
|
CALL_SUBTEST_COMBINATIONS_V2(13, test_execute_generator_op, float, 3);
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(13, test_execute_generator_op, float, 4);
|
CALL_SUBTEST_COMBINATIONS_V2(13, test_execute_generator_op, float, 4);
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(13, test_execute_generator_op, float, 5);
|
CALL_SUBTEST_COMBINATIONS_V2(13, test_execute_generator_op, float, 5);
|
||||||
|
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(14, test_execute_reverse_rvalue, float, 1);
|
CALL_SUBTEST_COMBINATIONS_V2(14, test_execute_reverse_rvalue, float, 1);
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(14, test_execute_reverse_rvalue, float, 2);
|
CALL_SUBTEST_COMBINATIONS_V2(14, test_execute_reverse_rvalue, float, 2);
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(14, test_execute_reverse_rvalue, float, 3);
|
CALL_SUBTEST_COMBINATIONS_V2(14, test_execute_reverse_rvalue, float, 3);
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(14, test_execute_reverse_rvalue, float, 4);
|
CALL_SUBTEST_COMBINATIONS_V2(14, test_execute_reverse_rvalue, float, 4);
|
||||||
CALL_SUBTEST_COMBINATIONS_V1(14, test_execute_reverse_rvalue, float, 5);
|
CALL_SUBTEST_COMBINATIONS_V2(14, test_execute_reverse_rvalue, float, 5);
|
||||||
|
|
||||||
CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 3);
|
CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 3);
|
||||||
CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 4);
|
CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 4);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user