Avoid using the version of TensorIntDiv optimized for 32-bit integers when the divisor can be equal to one since it isn't supported.

This commit is contained in:
Benoit Steiner 2015-11-18 11:37:58 -08:00
parent 4926251f13
commit 1dd444ea71
2 changed files with 29 additions and 7 deletions

View File

@ -116,7 +116,7 @@ namespace {
} }
template <typename T> template <typename T, bool div_gt_one = false>
struct TensorIntDivisor { struct TensorIntDivisor {
public: public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
@ -166,8 +166,9 @@ struct TensorIntDivisor {
// Optimized version for signed 32 bit integers. // Optimized version for signed 32 bit integers.
// Derived from Hacker's Delight. // Derived from Hacker's Delight.
// Only works for divisors strictly greater than one
template <> template <>
class TensorIntDivisor<int32_t> { class TensorIntDivisor<int32_t, true> {
public: public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
magic = 0; magic = 0;
@ -226,8 +227,8 @@ private:
}; };
template <typename T> template <typename T, bool div_gt_one>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
return divisor.divide(numerator); return divisor.divide(numerator);
} }

View File

@ -14,8 +14,29 @@
void test_signed_32bit() void test_signed_32bit()
{ {
// Divide by one
const Eigen::internal::TensorIntDivisor<int32_t, false> div(1);
for (int32_t j = 0; j < 25000; ++j) {
const int32_t fast_div = j / div;
const int32_t slow_div = j / 1;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
// Standard divide by 2 or more
for (int32_t i = 2; i < 25000; ++i) { for (int32_t i = 2; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<int32_t> div(i); const Eigen::internal::TensorIntDivisor<int32_t, false> div(i);
for (int32_t j = 0; j < 25000; ++j) {
const int32_t fast_div = j / div;
const int32_t slow_div = j / i;
VERIFY_IS_EQUAL(fast_div, slow_div);
}
}
// Optimized divide by 2 or more
for (int32_t i = 2; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<int32_t, true> div(i);
for (int32_t j = 0; j < 25000; ++j) { for (int32_t j = 0; j < 25000; ++j) {
const int32_t fast_div = j / div; const int32_t fast_div = j / div;
@ -42,7 +63,7 @@ void test_unsigned_32bit()
void test_signed_64bit() void test_signed_64bit()
{ {
for (int64_t i = 2; i < 25000; ++i) { for (int64_t i = 1; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<int64_t> div(i); const Eigen::internal::TensorIntDivisor<int64_t> div(i);
for (int64_t j = 0; j < 25000; ++j) { for (int64_t j = 0; j < 25000; ++j) {
@ -56,7 +77,7 @@ void test_signed_64bit()
void test_unsigned_64bit() void test_unsigned_64bit()
{ {
for (uint64_t i = 2; i < 25000; ++i) { for (uint64_t i = 1; i < 25000; ++i) {
const Eigen::internal::TensorIntDivisor<uint64_t> div(i); const Eigen::internal::TensorIntDivisor<uint64_t> div(i);
for (uint64_t j = 0; j < 25000; ++j) { for (uint64_t j = 0; j < 25000; ++j) {