Add async evaluation support to TensorPadding/TensorImagePatch/TensorShuffling

2025-09-14 02:13:13 +08:00 · 2019-11-26 11:41:57 -08:00 · 2019-11-26 11:41:57 -08:00 · bc66c88255
commit bc66c88255
parent c79b6ffe1f
4 changed files with 35 additions and 2 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@ -176,7 +176,8 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
        /*dst_base=*/m_buffer + desc.offset(),
        /*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()));
-    ArgTensorBlock block = m_impl.blockV2(desc, scratch);
+    ArgTensorBlock block =
        m_impl.blockV2(desc, scratch, /*root_of_expr_ast=*/true);
    // If block was evaluated into a destination buffer, there is no need to do
    // an assignment.
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@ -397,6 +397,14 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
    return true;
  }
 #ifdef EIGEN_USE_THREADS
  template <typename EvalSubExprsCallback>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
      EvaluatorPointerType, EvalSubExprsCallback done) {
    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
  }
 #endif  // EIGEN_USE_THREADS
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    m_impl.cleanup();
  }
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@ -155,6 +155,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
    m_impl.evalSubExprsIfNeeded(NULL);
    return true;
  }
 #ifdef EIGEN_USE_THREADS
  template <typename EvalSubExprsCallback>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
      EvaluatorPointerType, EvalSubExprsCallback done) {
    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
  }
 #endif  // EIGEN_USE_THREADS
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    m_impl.cleanup();
  }
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@ -182,6 +182,15 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
    m_impl.evalSubExprsIfNeeded(NULL);
    return true;
  }
 #ifdef EIGEN_USE_THREADS
  template <typename EvalSubExprsCallback>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
      EvaluatorPointerType, EvalSubExprsCallback done) {
    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
  }
 #endif  // EIGEN_USE_THREADS
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    m_impl.cleanup();
  }
@ -237,10 +246,16 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
      std::vector<internal::TensorOpResourceRequirements>* resources) const {
    static const int inner_dim =
        Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
    const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
    Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
        1, m_device.firstLevelCacheSize() / sizeof(Scalar));
    resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kUniformAllDims, block_total_size_max));
+        inner_dim_shuffled ? internal::kUniformAllDims
                           : internal::kSkewedInnerDims,
        block_total_size_max));
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2