From 2692fb2b712bec285b8d49cdc3f33d5cdd110669 Mon Sep 17 00:00:00 2001
From: Antonio Sanchez <cantonios@google.com>
Date: Mon, 22 Jan 2024 13:19:10 -0800
Subject: [PATCH] Fix compile-time error caused by chip static asserts

---
 unsupported/test/cxx11_tensor_executor.cpp | 181 +++++++++++----------
 1 file changed, 95 insertions(+), 86 deletions(-)
diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp
index ba553f909..309ddeb1b 100644
--- a/unsupported/test/cxx11_tensor_executor.cpp
+++ b/unsupported/test/cxx11_tensor_executor.cpp
@@ -24,7 +24,7 @@ using Eigen::internal::TiledEvaluation;
 // Default assignment that does no use block evaluation or vectorization.
 // We assume that default coefficient evaluation is well tested and correct.
 template <typename Dst, typename Expr>
-static void DefaultAssign(Dst& dst, Expr expr) {
+void DefaultAssign(Dst& dst, Expr expr) {
   using Assign = Eigen::TensorAssignOp<Dst, const Expr>;
   using Executor = Eigen::internal::TensorExecutor<const Assign, DefaultDevice,
                                                    /*Vectorizable=*/false,
@@ -35,7 +35,7 @@ static void DefaultAssign(Dst& dst, Expr expr) {
 
 // Assignment with specified device and tiling strategy.
 template <bool Vectorizable, TiledEvaluation Tiling, typename Device, typename Dst, typename Expr>
-static void DeviceAssign(Device& d, Dst& dst, Expr expr) {
+void DeviceAssign(Device& d, Dst& dst, Expr expr) {
   using Assign = Eigen::TensorAssignOp<Dst, const Expr>;
   using Executor = Eigen::internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
 
@@ -52,7 +52,7 @@ static array<Index, NumDims> RandomDims(int min_dim = 1, int max_dim = 20) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_unary_expr(Device d) {
+void test_execute_unary_expr(Device d) {
   static constexpr int Options = 0 | Layout;
 
   // Pick a large enough tensor size to bypass small tensor block evaluation
@@ -77,7 +77,7 @@ static void test_execute_unary_expr(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_binary_expr(Device d) {
+void test_execute_binary_expr(Device d) {
   static constexpr int Options = 0 | Layout;
 
   // Pick a large enough tensor size to bypass small tensor block evaluation
@@ -105,7 +105,7 @@ static void test_execute_binary_expr(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_broadcasting(Device d) {
+void test_execute_broadcasting(Device d) {
   static constexpr int Options = 0 | Layout;
 
   auto dims = RandomDims<NumDims>(1, 10);
@@ -134,92 +134,101 @@ static void test_execute_broadcasting(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_chipping_rvalue(Device d) {
-  auto dims = RandomDims<NumDims>(1, 10);
-  Tensor<T, NumDims, Layout, Index> src(dims);
-  src.setRandom();
+struct test_execute_chipping_rvalue_runner {
+  template <int ChipDim>
+  static void run_dim(Device& d, const array<Index, NumDims>& dims, const Tensor<T, NumDims, Layout, Index>& src) {
+    const auto offset = internal::random<Index>(0, dims[(ChipDim)] - 1);
+    const auto expr = src.template chip<ChipDim>(offset);
 
-#define TEST_CHIPPING(CHIP_DIM)                                                            \
-  if (NumDims > (CHIP_DIM)) {                                                              \
-    const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1);                  \
-    const auto expr = src.template chip<(CHIP_DIM)>(offset);                               \
-                                                                                           \
-    Tensor<T, NumDims - 1, Layout, Index> golden;                                          \
-    golden = expr;                                                                         \
-                                                                                           \
-    Tensor<T, NumDims - 1, Layout, Index> dst(golden.dimensions());                        \
-                                                                                           \
-    using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;                    \
-    using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>; \
-                                                                                           \
-    Executor::run(Assign(dst, expr), d);                                                   \
-                                                                                           \
-    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {                             \
-      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));                                      \
-    }                                                                                      \
+    Tensor<T, NumDims - 1, Layout, Index> golden;
+    golden = expr;
+
+    Tensor<T, NumDims - 1, Layout, Index> dst(golden.dimensions());
+
+    using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
+    using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
+
+    Executor::run(Assign(dst, expr), d);
+
+    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
+      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
+    }
+
+    // Recursively reduce chip dimension.
+    run_dim<ChipDim - 1>(d, dims, src);
   }
 
-  TEST_CHIPPING(0)
-  TEST_CHIPPING(1)
-  TEST_CHIPPING(2)
-  TEST_CHIPPING(3)
-  TEST_CHIPPING(4)
-  TEST_CHIPPING(5)
+  template <>
+  void run_dim<-1>(Device&, const array<Index, NumDims>&, const Tensor<T, NumDims, Layout, Index>&) {}
 
-#undef TEST_CHIPPING
+  static void run(Device d) {
+    auto dims = RandomDims<NumDims>(1, 10);
+    Tensor<T, NumDims, Layout, Index> src(dims);
+    src.setRandom();
+    run_dim<NumDims - 1>(d, dims, src);
+  }
+};
+
+template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
+void test_execute_chipping_rvalue(Device d) {
+  test_execute_chipping_rvalue_runner<T, NumDims, Device, Vectorizable, Tiling, Layout>::run(d);
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_chipping_lvalue(Device d) {
-  auto dims = RandomDims<NumDims>(1, 10);
+struct test_execute_chipping_lvalue_runner {
+  template <int ChipDim>
+  static void run_dim(Device& d, const array<Index, NumDims>& dims) {
+    /* Generate random data that we'll assign to the chipped tensor dim. */
+    array<Index, NumDims - 1> src_dims;
+    for (int i = 0; i < NumDims - 1; ++i) {
+      int dim = i < (ChipDim) ? i : i + 1;
+      src_dims[i] = dims[dim];
+    }
 
-#define TEST_CHIPPING(CHIP_DIM)                                                            \
-  if (NumDims > (CHIP_DIM)) {                                                              \
-    /* Generate random data that we'll assign to the chipped tensor dim. */                \
-    array<Index, NumDims - 1> src_dims;                                                    \
-    for (int i = 0; i < NumDims - 1; ++i) {                                                \
-      int dim = i < (CHIP_DIM) ? i : i + 1;                                                \
-      src_dims[i] = dims[dim];                                                             \
-    }                                                                                      \
-                                                                                           \
-    Tensor<T, NumDims - 1, Layout, Index> src(src_dims);                                   \
-    src.setRandom();                                                                       \
-                                                                                           \
-    const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1);                  \
-                                                                                           \
-    Tensor<T, NumDims, Layout, Index> random(dims);                                        \
-    random.setZero();                                                                      \
-                                                                                           \
-    Tensor<T, NumDims, Layout, Index> golden(dims);                                        \
-    golden = random;                                                                       \
-    golden.template chip<(CHIP_DIM)>(offset) = src;                                        \
-                                                                                           \
-    Tensor<T, NumDims, Layout, Index> dst(dims);                                           \
-    dst = random;                                                                          \
-    auto expr = dst.template chip<(CHIP_DIM)>(offset);                                     \
-                                                                                           \
-    using Assign = TensorAssignOp<decltype(expr), const decltype(src)>;                    \
-    using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>; \
-                                                                                           \
-    Executor::run(Assign(expr, src), d);                                                   \
-                                                                                           \
-    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {                             \
-      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));                                      \
-    }                                                                                      \
+    Tensor<T, NumDims - 1, Layout, Index> src(src_dims);
+    src.setRandom();
+
+    const auto offset = internal::random<Index>(0, dims[(ChipDim)] - 1);
+
+    Tensor<T, NumDims, Layout, Index> random(dims);
+    random.setZero();
+
+    Tensor<T, NumDims, Layout, Index> golden(dims);
+    golden = random;
+    golden.template chip<(ChipDim)>(offset) = src;
+
+    Tensor<T, NumDims, Layout, Index> dst(dims);
+    dst = random;
+    auto expr = dst.template chip<(ChipDim)>(offset);
+
+    using Assign = TensorAssignOp<decltype(expr), const decltype(src)>;
+    using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
+
+    Executor::run(Assign(expr, src), d);
+
+    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
+      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
+    }
+
+    run_dim<ChipDim - 1>(d, dims);
   }
 
-  TEST_CHIPPING(0)
-  TEST_CHIPPING(1)
-  TEST_CHIPPING(2)
-  TEST_CHIPPING(3)
-  TEST_CHIPPING(4)
-  TEST_CHIPPING(5)
+  template <>
+  void run_dim<-1>(Device&, const array<Index, NumDims>&) {}
 
-#undef TEST_CHIPPING
+  static void run(Device d) {
+    auto dims = RandomDims<NumDims>(1, 10);
+    run_dim<NumDims - 1>(d, dims);
+  }
+};
+
+template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
+void test_execute_chipping_lvalue(Device d) {
+  test_execute_chipping_lvalue_runner<T, NumDims, Device, Vectorizable, Tiling, Layout>::run(d);
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_shuffle_rvalue(Device d) {
+void test_execute_shuffle_rvalue(Device d) {
   static constexpr int Options = 0 | Layout;
 
   auto dims = RandomDims<NumDims>(1, 10);
@@ -255,7 +264,7 @@ static void test_execute_shuffle_rvalue(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_shuffle_lvalue(Device d) {
+void test_execute_shuffle_lvalue(Device d) {
   static constexpr int Options = 0 | Layout;
 
   auto dims = RandomDims<NumDims>(5, 10);
@@ -289,7 +298,7 @@ static void test_execute_shuffle_lvalue(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_reshape(Device d) {
+void test_execute_reshape(Device d) {
   static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
 
   static constexpr int ReshapedDims = NumDims - 1;
@@ -326,7 +335,7 @@ static void test_execute_reshape(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_slice_rvalue(Device d) {
+void test_execute_slice_rvalue(Device d) {
   static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
   static constexpr int Options = 0 | Layout;
 
@@ -362,7 +371,7 @@ static void test_execute_slice_rvalue(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_slice_lvalue(Device d) {
+void test_execute_slice_lvalue(Device d) {
   static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
   static constexpr int Options = 0 | Layout;
 
@@ -402,7 +411,7 @@ static void test_execute_slice_lvalue(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_broadcasting_of_forced_eval(Device d) {
+void test_execute_broadcasting_of_forced_eval(Device d) {
   static constexpr int Options = 0 | Layout;
 
   auto dims = RandomDims<NumDims>(1, 10);
@@ -442,7 +451,7 @@ struct DummyGenerator {
 };
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_generator_op(Device d) {
+void test_execute_generator_op(Device d) {
   static constexpr int Options = 0 | Layout;
 
   auto dims = RandomDims<NumDims>(20, 30);
@@ -470,7 +479,7 @@ static void test_execute_generator_op(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_execute_reverse_rvalue(Device d) {
+void test_execute_reverse_rvalue(Device d) {
   static constexpr int Options = 0 | Layout;
 
   auto dims = RandomDims<NumDims>(1, numext::pow(1000000.0, 1.0 / NumDims));
@@ -502,7 +511,7 @@ static void test_execute_reverse_rvalue(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_async_execute_unary_expr(Device d) {
+void test_async_execute_unary_expr(Device d) {
   static constexpr int Options = 0 | Layout;
 
   // Pick a large enough tensor size to bypass small tensor block evaluation
@@ -532,7 +541,7 @@ static void test_async_execute_unary_expr(Device d) {
 }
 
 template <typename T, int NumDims, typename Device, bool Vectorizable, TiledEvaluation Tiling, int Layout>
-static void test_async_execute_binary_expr(Device d) {
+void test_async_execute_binary_expr(Device d) {
   static constexpr int Options = 0 | Layout;
 
   // Pick a large enough tensor size to bypass small tensor block evaluation