Explicitly construct tensor block dimensions from evaluator dimensions

2025-09-18 12:23:13 +08:00 · 2018-09-14 16:55:05 -07:00 · 2018-09-14 16:55:05 -07:00 · 4863375723
commit 4863375723
parent 281e631839
2 changed files with 5 additions and 4 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -282,8 +282,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
      TensorOpCost cost = evaluator.costPerCoeff(Vectorizable);
      double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(1, cost);
      size_t block_size = static_cast<size_t>(1.0 / taskSize);
-      TensorBlockMapper block_mapper(evaluator.dimensions(), block_shape,
+      TensorBlockMapper block_mapper(
-                                     block_size);
+          typename TensorBlockMapper::Dimensions(evaluator.dimensions()),
          block_shape, block_size);
      block_size = block_mapper.block_dims_total_size();
      const size_t aligned_blocksize =
          EIGEN_MAX_ALIGN_BYTES *
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@ -976,7 +976,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
        // find that scattered reads are not worth supporting in
        // TensorSliceBlockMapper.
        TensorSliceBlockMapper block_mapper(
-            input_tensor_dims, tensor_slice_offsets, tensor_slice_extents,
+            typename TensorSliceBlockMapper::Dimensions(input_tensor_dims),
            tensor_slice_offsets, tensor_slice_extents,
            target_input_block_sizes, DimensionList<Index, NumInputDims>());
        const Index num_outputs_to_update =
@ -1232,7 +1233,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
      } else if (!first_preserved_dim_allocated) {
        // TODO(andydavis) Include output block size in this L1 working set
        // calculation.
        const Index allocated = max_coeff_count - coeff_to_allocate;
        const Index alloc_size = numext::maxi(
            static_cast<Index>(1), coeff_to_allocate / reducer_overhead);
        (*target_input_block_sizes)[dim] =