diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index bf7522682..d1e4c82d2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -111,22 +111,28 @@ struct TensorEvaluator, Device> IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = true, - BlockAccessV2 = false, + BlockAccessV2 = true, PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = true }; - typedef typename internal::TensorBlock< - CoeffReturnType, Index, internal::traits::NumDimensions, Layout> - TensorBlock; - typedef typename internal::TensorBlockReader< - CoeffReturnType, Index, internal::traits::NumDimensions, Layout> - TensorBlockReader; + static const int NumDims = internal::traits::NumDimensions; + + typedef typename internal::TensorBlock TensorBlock; + typedef typename internal::TensorBlockReader TensorBlockReader; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockNotImplemented TensorBlockV2; + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlockV2 + ArgTensorBlock; + + typedef internal::TensorBlockAssignment< + Scalar, NumDims, typename ArgTensorBlock::XprType, Index> + TensorBlockAssignment; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -164,6 +170,30 @@ struct TensorEvaluator, Device> m_impl.block(&eval_to_block); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2( + TensorBlockDesc& desc, TensorBlockScratch& scratch) { + // Add `m_buffer` as destination buffer to the block descriptor. + desc.AddDestinationBuffer( + /*dst_base=*/m_buffer + desc.offset(), + /*dst_strides=*/internal::strides(m_impl.dimensions()), + /*total_dst_bytes=*/ + (internal::array_prod(m_impl.dimensions()) + * sizeof(Scalar))); + + ArgTensorBlock block = m_impl.blockV2(desc, scratch); + + // If block was evaluated into a destination buffer, there is no need to do + // an assignment. + if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) { + TensorBlockAssignment::Run( + TensorBlockAssignment::target( + desc.dimensions(), internal::strides(m_impl.dimensions()), + m_buffer, desc.offset()), + block.expr()); + } + block.cleanup(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index a7cb8dc97..97ac96db1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -238,7 +238,8 @@ class TensorExecutor TensorBlockMapper; typedef typename TensorBlock::Dimensions TensorBlockDimensions; - typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockDescriptor + TensorBlockDesc; typedef internal::TensorBlockScratchAllocator TensorBlockScratch; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 489b915ac..f3907be6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -231,7 +231,11 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { - eigen_assert(m_impl.data() != NULL); + // If one of the dimensions is zero, return empty block view. + if (desc.size() == 0) { + return TensorBlockV2(internal::TensorBlockKind::kView, NULL, + desc.dimensions()); + } // Check if we can reuse `desc` destination, or allocate new scratch buffer. ScalarNoConst* materialized_output = @@ -385,6 +389,8 @@ struct TensorEvaluator, Device const Index out = output_offset + output_inner_pad_before_size; const Index in = input_offset + output_inner_pad_before_size; + eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL); + LinCopy::template Run( typename LinCopy::Dst(out, 1, materialized_output), typename LinCopy::Src(in, 1, m_impl.data()), diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp index 75252362c..1dc0a9e2c 100644 --- a/unsupported/test/cxx11_tensor_block_eval.cpp +++ b/unsupported/test/cxx11_tensor_block_eval.cpp @@ -131,6 +131,7 @@ static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) { // TensorEvaluator is needed to produce tensor blocks of the expression. auto eval = TensorEvaluator(expr, d); + eval.evalSubExprsIfNeeded(nullptr); // Choose a random offsets, sizes and TensorBlockDescriptor. TensorBlockParams block_params = gen_block(); @@ -266,29 +267,6 @@ static void test_eval_tensor_reshape() { [&shuffled]() { return SkewedInnerBlock(shuffled); }); } -template -static void test_eval_tensor_reshape_with_bcast() { - Index dim = internal::random(1, 100); - - Tensor lhs(1, dim); - Tensor rhs(dim, 1); - lhs.setRandom(); - rhs.setRandom(); - - auto reshapeLhs = NByOne(dim); - auto reshapeRhs = OneByM(dim); - - auto bcastLhs = OneByM(dim); - auto bcastRhs = NByOne(dim); - - DSizes dims(dim, dim); - - VerifyBlockEvaluator( - lhs.reshape(reshapeLhs).broadcast(bcastLhs) + - rhs.reshape(reshapeRhs).broadcast(bcastRhs), - [dims]() { return SkewedInnerBlock(dims); }); -} - template static void test_eval_tensor_cast() { DSizes dims = RandomDims(10, 20); @@ -355,6 +333,52 @@ static void test_eval_tensor_padding() { [&padded_dims]() { return SkewedInnerBlock(padded_dims); }); } +template +static void test_eval_tensor_reshape_with_bcast() { + Index dim = internal::random(1, 100); + + Tensor lhs(1, dim); + Tensor rhs(dim, 1); + lhs.setRandom(); + rhs.setRandom(); + + auto reshapeLhs = NByOne(dim); + auto reshapeRhs = OneByM(dim); + + auto bcastLhs = OneByM(dim); + auto bcastRhs = NByOne(dim); + + DSizes dims(dim, dim); + + VerifyBlockEvaluator( + lhs.reshape(reshapeLhs).broadcast(bcastLhs) + + rhs.reshape(reshapeRhs).broadcast(bcastRhs), + [dims]() { return SkewedInnerBlock(dims); }); +} + +template +static void test_eval_tensor_forced_eval() { + Index dim = internal::random(1, 100); + + Tensor lhs(dim, 1); + Tensor rhs(1, dim); + lhs.setRandom(); + rhs.setRandom(); + + auto bcastLhs = OneByM(dim); + auto bcastRhs = NByOne(dim); + + DSizes dims(dim, dim); + + VerifyBlockEvaluator( + (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), + [dims]() { return SkewedInnerBlock(dims); }); + + VerifyBlockEvaluator( + (lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims), + [dims]() { return RandomBlock(dims, 1, 50); }); +} + // -------------------------------------------------------------------------- // // Verify that assigning block to a Tensor expression produces the same result // as an assignment to TensorSliceOp (writing a block is is identical to @@ -482,6 +506,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) { CALL_SUBTESTS_DIMS_LAYOUTS(test_eval_tensor_padding); CALL_SUBTESTS_LAYOUTS(test_eval_tensor_reshape_with_bcast); + CALL_SUBTESTS_LAYOUTS(test_eval_tensor_forced_eval); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor); CALL_SUBTESTS_DIMS_LAYOUTS(test_assign_to_tensor_reshape);