mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Add block evaluation to TensorEvalTo and fix few small bugs
This commit is contained in:
parent
3afb640b56
commit
f74ab8cb8d
@ -111,22 +111,28 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
|||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
BlockAccess = true,
|
BlockAccess = true,
|
||||||
BlockAccessV2 = false,
|
BlockAccessV2 = true,
|
||||||
PreferBlockAccess = false,
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
CoordAccess = false, // to be implemented
|
CoordAccess = false, // to be implemented
|
||||||
RawAccess = true
|
RawAccess = true
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename internal::TensorBlock<
|
static const int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||||
CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout>
|
|
||||||
TensorBlock;
|
typedef typename internal::TensorBlock<CoeffReturnType, Index, NumDims, Layout> TensorBlock;
|
||||||
typedef typename internal::TensorBlockReader<
|
typedef typename internal::TensorBlockReader<CoeffReturnType, Index, NumDims, Layout> TensorBlockReader;
|
||||||
CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout>
|
|
||||||
TensorBlockReader;
|
|
||||||
|
|
||||||
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
|
||||||
typedef internal::TensorBlockNotImplemented TensorBlockV2;
|
typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
|
||||||
|
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
|
||||||
|
|
||||||
|
typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2
|
||||||
|
ArgTensorBlock;
|
||||||
|
|
||||||
|
typedef internal::TensorBlockAssignment<
|
||||||
|
Scalar, NumDims, typename ArgTensorBlock::XprType, Index>
|
||||||
|
TensorBlockAssignment;
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
@ -164,6 +170,30 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
|||||||
m_impl.block(&eval_to_block);
|
m_impl.block(&eval_to_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2(
|
||||||
|
TensorBlockDesc& desc, TensorBlockScratch& scratch) {
|
||||||
|
// Add `m_buffer` as destination buffer to the block descriptor.
|
||||||
|
desc.AddDestinationBuffer(
|
||||||
|
/*dst_base=*/m_buffer + desc.offset(),
|
||||||
|
/*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()),
|
||||||
|
/*total_dst_bytes=*/
|
||||||
|
(internal::array_prod(m_impl.dimensions())
|
||||||
|
* sizeof(Scalar)));
|
||||||
|
|
||||||
|
ArgTensorBlock block = m_impl.blockV2(desc, scratch);
|
||||||
|
|
||||||
|
// If block was evaluated into a destination buffer, there is no need to do
|
||||||
|
// an assignment.
|
||||||
|
if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) {
|
||||||
|
TensorBlockAssignment::Run(
|
||||||
|
TensorBlockAssignment::target(
|
||||||
|
desc.dimensions(), internal::strides<Layout>(m_impl.dimensions()),
|
||||||
|
m_buffer, desc.offset()),
|
||||||
|
block.expr());
|
||||||
|
}
|
||||||
|
block.cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
m_impl.cleanup();
|
m_impl.cleanup();
|
||||||
}
|
}
|
||||||
|
@ -238,7 +238,8 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
|
|||||||
typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
|
typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
|
||||||
typedef typename TensorBlock::Dimensions TensorBlockDimensions;
|
typedef typename TensorBlock::Dimensions TensorBlockDimensions;
|
||||||
|
|
||||||
typedef internal::TensorBlockDescriptor<NumDims> TensorBlockDesc;
|
typedef internal::TensorBlockDescriptor<NumDims, StorageIndex>
|
||||||
|
TensorBlockDesc;
|
||||||
typedef internal::TensorBlockScratchAllocator<DefaultDevice>
|
typedef internal::TensorBlockScratchAllocator<DefaultDevice>
|
||||||
TensorBlockScratch;
|
TensorBlockScratch;
|
||||||
|
|
||||||
|
@ -231,7 +231,11 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
|
||||||
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const {
|
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const {
|
||||||
eigen_assert(m_impl.data() != NULL);
|
// If one of the dimensions is zero, return empty block view.
|
||||||
|
if (desc.size() == 0) {
|
||||||
|
return TensorBlockV2(internal::TensorBlockKind::kView, NULL,
|
||||||
|
desc.dimensions());
|
||||||
|
}
|
||||||
|
|
||||||
// Check if we can reuse `desc` destination, or allocate new scratch buffer.
|
// Check if we can reuse `desc` destination, or allocate new scratch buffer.
|
||||||
ScalarNoConst* materialized_output =
|
ScalarNoConst* materialized_output =
|
||||||
@ -385,6 +389,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
const Index out = output_offset + output_inner_pad_before_size;
|
const Index out = output_offset + output_inner_pad_before_size;
|
||||||
const Index in = input_offset + output_inner_pad_before_size;
|
const Index in = input_offset + output_inner_pad_before_size;
|
||||||
|
|
||||||
|
eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
|
||||||
|
|
||||||
LinCopy::template Run<LinCopy::Kind::Linear>(
|
LinCopy::template Run<LinCopy::Kind::Linear>(
|
||||||
typename LinCopy::Dst(out, 1, materialized_output),
|
typename LinCopy::Dst(out, 1, materialized_output),
|
||||||
typename LinCopy::Src(in, 1, m_impl.data()),
|
typename LinCopy::Src(in, 1, m_impl.data()),
|
||||||
|
@ -131,6 +131,7 @@ static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) {
|
|||||||
|
|
||||||
// TensorEvaluator is needed to produce tensor blocks of the expression.
|
// TensorEvaluator is needed to produce tensor blocks of the expression.
|
||||||
auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d);
|
auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d);
|
||||||
|
eval.evalSubExprsIfNeeded(nullptr);
|
||||||
|
|
||||||
// Choose a random offsets, sizes and TensorBlockDescriptor.
|
// Choose a random offsets, sizes and TensorBlockDescriptor.
|
||||||
TensorBlockParams<NumDims> block_params = gen_block();
|
TensorBlockParams<NumDims> block_params = gen_block();
|
||||||
@ -266,29 +267,6 @@ static void test_eval_tensor_reshape() {
|
|||||||
[&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
|
[&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int Layout>
|
|
||||||
static void test_eval_tensor_reshape_with_bcast() {
|
|
||||||
Index dim = internal::random<Index>(1, 100);
|
|
||||||
|
|
||||||
Tensor<T, 2, Layout> lhs(1, dim);
|
|
||||||
Tensor<T, 2, Layout> rhs(dim, 1);
|
|
||||||
lhs.setRandom();
|
|
||||||
rhs.setRandom();
|
|
||||||
|
|
||||||
auto reshapeLhs = NByOne(dim);
|
|
||||||
auto reshapeRhs = OneByM(dim);
|
|
||||||
|
|
||||||
auto bcastLhs = OneByM(dim);
|
|
||||||
auto bcastRhs = NByOne(dim);
|
|
||||||
|
|
||||||
DSizes<Index, 2> dims(dim, dim);
|
|
||||||
|
|
||||||
VerifyBlockEvaluator<T, 2, Layout>(
|
|
||||||
lhs.reshape(reshapeLhs).broadcast(bcastLhs) +
|
|
||||||
rhs.reshape(reshapeRhs).broadcast(bcastRhs),
|
|
||||||
[dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, int NumDims, int Layout>
|
template <typename T, int NumDims, int Layout>
|
||||||
static void test_eval_tensor_cast() {
|
static void test_eval_tensor_cast() {
|
||||||
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
|
DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
|
||||||
@ -355,6 +333,52 @@ static void test_eval_tensor_padding() {
|
|||||||
[&padded_dims]() { return SkewedInnerBlock<Layout>(padded_dims); });
|
[&padded_dims]() { return SkewedInnerBlock<Layout>(padded_dims); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, int Layout>
|
||||||
|
static void test_eval_tensor_reshape_with_bcast() {
|
||||||
|
Index dim = internal::random<Index>(1, 100);
|
||||||
|
|
||||||
|
Tensor<T, 2, Layout> lhs(1, dim);
|
||||||
|
Tensor<T, 2, Layout> rhs(dim, 1);
|
||||||
|
lhs.setRandom();
|
||||||
|
rhs.setRandom();
|
||||||
|
|
||||||
|
auto reshapeLhs = NByOne(dim);
|
||||||
|
auto reshapeRhs = OneByM(dim);
|
||||||
|
|
||||||
|
auto bcastLhs = OneByM(dim);
|
||||||
|
auto bcastRhs = NByOne(dim);
|
||||||
|
|
||||||
|
DSizes<Index, 2> dims(dim, dim);
|
||||||
|
|
||||||
|
VerifyBlockEvaluator<T, 2, Layout>(
|
||||||
|
lhs.reshape(reshapeLhs).broadcast(bcastLhs) +
|
||||||
|
rhs.reshape(reshapeRhs).broadcast(bcastRhs),
|
||||||
|
[dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, int Layout>
|
||||||
|
static void test_eval_tensor_forced_eval() {
|
||||||
|
Index dim = internal::random<Index>(1, 100);
|
||||||
|
|
||||||
|
Tensor<T, 2, Layout> lhs(dim, 1);
|
||||||
|
Tensor<T, 2, Layout> rhs(1, dim);
|
||||||
|
lhs.setRandom();
|
||||||
|
rhs.setRandom();
|
||||||
|
|
||||||
|
auto bcastLhs = OneByM(dim);
|
||||||
|
auto bcastRhs = NByOne(dim);
|
||||||
|
|
||||||
|
DSizes<Index, 2> dims(dim, dim);
|
||||||
|
|
||||||
|
VerifyBlockEvaluator<T, 2, Layout>(
|
||||||
|
(lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims),
|
||||||
|
[dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
|
||||||
|
|
||||||
|
VerifyBlockEvaluator<T, 2, Layout>(
|
||||||
|
(lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims),
|
||||||
|
[dims]() { return RandomBlock<Layout, 2>(dims, 1, 50); });
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------- //
|
// -------------------------------------------------------------------------- //
|
||||||
// Verify that assigning block to a Tensor expression produces the same result
|
// Verify that assigning block to a Tensor expression produces the same result
|
||||||
// as an assignment to TensorSliceOp (writing a block is is identical to
|
// as an assignment to TensorSliceOp (writing a block is is identical to
|
||||||
@ -482,6 +506,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) {
|
|||||||
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding);
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding);
|
||||||
|
|
||||||
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast);
|
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast);
|
||||||
|
CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval);
|
||||||
|
|
||||||
CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor);
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor);
|
||||||
CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_reshape);
|
CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_reshape);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user