Add async evaluation support to TensorPadding/TensorImagePatch/TensorShuffling

2025-09-13 18:03:13 +08:00 · 2019-11-26 11:41:57 -08:00 · 2019-11-26 11:41:57 -08:00 · bc66c88255
commit bc66c88255
parent c79b6ffe1f
4 changed files with 35 additions and 2 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@ -176,7 +176,8 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
        /*dst_base=*/m_buffer + desc.offset(),
        /*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()));

-    ArgTensorBlock block = m_impl.blockV2(desc, scratch);
+    ArgTensorBlock block =
+        m_impl.blockV2(desc, scratch, /*root_of_expr_ast=*/true);

    // If block was evaluated into a destination buffer, there is no need to do
    // an assignment.
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@ -397,6 +397,14 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
    return true;
  }

+#ifdef EIGEN_USE_THREADS
+  template <typename EvalSubExprsCallback>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+      EvaluatorPointerType, EvalSubExprsCallback done) {
+    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+  }
+#endif  // EIGEN_USE_THREADS
+
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    m_impl.cleanup();
  }
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@ -155,6 +155,15 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
    m_impl.evalSubExprsIfNeeded(NULL);
    return true;
  }
+
+#ifdef EIGEN_USE_THREADS
+  template <typename EvalSubExprsCallback>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+      EvaluatorPointerType, EvalSubExprsCallback done) {
+    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+  }
+#endif  // EIGEN_USE_THREADS
+
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    m_impl.cleanup();
  }
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@ -182,6 +182,15 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
    m_impl.evalSubExprsIfNeeded(NULL);
    return true;
  }
+
+#ifdef EIGEN_USE_THREADS
+  template <typename EvalSubExprsCallback>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+      EvaluatorPointerType, EvalSubExprsCallback done) {
+    m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+  }
+#endif  // EIGEN_USE_THREADS
+
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    m_impl.cleanup();
  }
@ -237,10 +246,16 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
      std::vector<internal::TensorOpResourceRequirements>* resources) const {
+    static const int inner_dim =
+        Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
+    const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
+
    Eigen::Index block_total_size_max = numext::maxi<Eigen::Index>(
        1, m_device.firstLevelCacheSize() / sizeof(Scalar));
    resources->push_back(internal::TensorOpResourceRequirements(
-        internal::kUniformAllDims, block_total_size_max));
+        inner_dim_shuffled ? internal::kUniformAllDims
+                           : internal::kSkewedInnerDims,
+        block_total_size_max));
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2