diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index fd2441894..058fb2c42 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -116,7 +116,7 @@ namespace { } -template +template struct TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { @@ -166,8 +166,9 @@ struct TensorIntDivisor { // Optimized version for signed 32 bit integers. // Derived from Hacker's Delight. +// Only works for divisors strictly greater than one template <> -class TensorIntDivisor { +class TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { magic = 0; @@ -226,8 +227,8 @@ private: }; -template -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { return divisor.divide(numerator); } diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp index 343b37dbd..fd6d27ae1 100644 --- a/unsupported/test/cxx11_tensor_intdiv.cpp +++ b/unsupported/test/cxx11_tensor_intdiv.cpp @@ -14,8 +14,29 @@ void test_signed_32bit() { + // Divide by one + const Eigen::internal::TensorIntDivisor div(1); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / 1; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + + // Standard divide by 2 or more for (int32_t i = 2; i < 25000; ++i) { - const Eigen::internal::TensorIntDivisor div(i); + const Eigen::internal::TensorIntDivisor div(i); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } + + // Optimized divide by 2 or more + for (int32_t i = 2; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor div(i); for (int32_t j = 0; j < 25000; ++j) { const int32_t fast_div = j / div; @@ -42,7 +63,7 @@ void test_unsigned_32bit() void test_signed_64bit() { - for (int64_t i = 2; i < 25000; ++i) { + for (int64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor div(i); for (int64_t j = 0; j < 25000; ++j) { @@ -56,7 +77,7 @@ void test_signed_64bit() void test_unsigned_64bit() { - for (uint64_t i = 2; i < 25000; ++i) { + for (uint64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor div(i); for (uint64_t j = 0; j < 25000; ++j) {