Avoid using the version of TensorIntDiv optimized for 32-bit integers when the divisor can be equal to one since it isn't supported.

2025-07-24 13:54:28 +08:00 · 2015-11-18 11:37:58 -08:00 · 2015-11-18 11:37:58 -08:00 · 1dd444ea71
commit 1dd444ea71
parent 4926251f13
2 changed files with 29 additions and 7 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@ -116,7 +116,7 @@ namespace {
 }


-template <typename T>
+template <typename T, bool div_gt_one = false>
 struct TensorIntDivisor {
 public:
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
@ -166,8 +166,9 @@ struct TensorIntDivisor {

 // Optimized version for signed 32 bit integers.
 // Derived from Hacker's Delight.
+// Only works for divisors strictly greater than one
 template <>
-class TensorIntDivisor<int32_t> {
+class TensorIntDivisor<int32_t, true> {
 public:
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
    magic = 0;
@ -226,8 +227,8 @@ private:
 };


-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
+template <typename T, bool div_gt_one>
+static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
  return divisor.divide(numerator);
 }

--- a/unsupported/test/cxx11_tensor_intdiv.cpp
+++ b/unsupported/test/cxx11_tensor_intdiv.cpp
@ -14,8 +14,29 @@

 void test_signed_32bit()
 {
+  // Divide by one
+  const Eigen::internal::TensorIntDivisor<int32_t, false> div(1);
+
+  for (int32_t j = 0; j < 25000; ++j) {
+    const int32_t fast_div = j / div;
+    const int32_t slow_div = j / 1;
+    VERIFY_IS_EQUAL(fast_div, slow_div);
+  }
+
+  // Standard divide by 2 or more
  for (int32_t i = 2; i < 25000; ++i) {
-    const Eigen::internal::TensorIntDivisor<int32_t> div(i);
+    const Eigen::internal::TensorIntDivisor<int32_t, false> div(i);
+
+    for (int32_t j = 0; j < 25000; ++j) {
+      const int32_t fast_div = j / div;
+      const int32_t slow_div = j / i;
+      VERIFY_IS_EQUAL(fast_div, slow_div);
+    }
+  }
+
+  // Optimized divide by 2 or more
+  for (int32_t i = 2; i < 25000; ++i) {
+    const Eigen::internal::TensorIntDivisor<int32_t, true> div(i);

    for (int32_t j = 0; j < 25000; ++j) {
      const int32_t fast_div = j / div;
@ -42,7 +63,7 @@ void test_unsigned_32bit()

 void test_signed_64bit()
 {
-  for (int64_t i = 2; i < 25000; ++i) {
+  for (int64_t i = 1; i < 25000; ++i) {
    const Eigen::internal::TensorIntDivisor<int64_t> div(i);

    for (int64_t j = 0; j < 25000; ++j) {
@ -56,7 +77,7 @@ void test_signed_64bit()

 void test_unsigned_64bit()
 {
-  for (uint64_t i = 2; i < 25000; ++i) {
+  for (uint64_t i = 1; i < 25000; ++i) {
    const Eigen::internal::TensorIntDivisor<uint64_t> div(i);

    for (uint64_t j = 0; j < 25000; ++j) {