mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Avoid using the version of TensorIntDiv optimized for 32-bit integers when the divisor can be equal to one since it isn't supported.
This commit is contained in:
parent
4926251f13
commit
1dd444ea71
@ -116,7 +116,7 @@ namespace {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, bool div_gt_one = false>
|
||||||
struct TensorIntDivisor {
|
struct TensorIntDivisor {
|
||||||
public:
|
public:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
|
||||||
@ -166,8 +166,9 @@ struct TensorIntDivisor {
|
|||||||
|
|
||||||
// Optimized version for signed 32 bit integers.
|
// Optimized version for signed 32 bit integers.
|
||||||
// Derived from Hacker's Delight.
|
// Derived from Hacker's Delight.
|
||||||
|
// Only works for divisors strictly greater than one
|
||||||
template <>
|
template <>
|
||||||
class TensorIntDivisor<int32_t> {
|
class TensorIntDivisor<int32_t, true> {
|
||||||
public:
|
public:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
|
||||||
magic = 0;
|
magic = 0;
|
||||||
@ -226,8 +227,8 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, bool div_gt_one>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
|
||||||
return divisor.divide(numerator);
|
return divisor.divide(numerator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,8 +14,29 @@
|
|||||||
|
|
||||||
void test_signed_32bit()
|
void test_signed_32bit()
|
||||||
{
|
{
|
||||||
|
// Divide by one
|
||||||
|
const Eigen::internal::TensorIntDivisor<int32_t, false> div(1);
|
||||||
|
|
||||||
|
for (int32_t j = 0; j < 25000; ++j) {
|
||||||
|
const int32_t fast_div = j / div;
|
||||||
|
const int32_t slow_div = j / 1;
|
||||||
|
VERIFY_IS_EQUAL(fast_div, slow_div);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Standard divide by 2 or more
|
||||||
for (int32_t i = 2; i < 25000; ++i) {
|
for (int32_t i = 2; i < 25000; ++i) {
|
||||||
const Eigen::internal::TensorIntDivisor<int32_t> div(i);
|
const Eigen::internal::TensorIntDivisor<int32_t, false> div(i);
|
||||||
|
|
||||||
|
for (int32_t j = 0; j < 25000; ++j) {
|
||||||
|
const int32_t fast_div = j / div;
|
||||||
|
const int32_t slow_div = j / i;
|
||||||
|
VERIFY_IS_EQUAL(fast_div, slow_div);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optimized divide by 2 or more
|
||||||
|
for (int32_t i = 2; i < 25000; ++i) {
|
||||||
|
const Eigen::internal::TensorIntDivisor<int32_t, true> div(i);
|
||||||
|
|
||||||
for (int32_t j = 0; j < 25000; ++j) {
|
for (int32_t j = 0; j < 25000; ++j) {
|
||||||
const int32_t fast_div = j / div;
|
const int32_t fast_div = j / div;
|
||||||
@ -42,7 +63,7 @@ void test_unsigned_32bit()
|
|||||||
|
|
||||||
void test_signed_64bit()
|
void test_signed_64bit()
|
||||||
{
|
{
|
||||||
for (int64_t i = 2; i < 25000; ++i) {
|
for (int64_t i = 1; i < 25000; ++i) {
|
||||||
const Eigen::internal::TensorIntDivisor<int64_t> div(i);
|
const Eigen::internal::TensorIntDivisor<int64_t> div(i);
|
||||||
|
|
||||||
for (int64_t j = 0; j < 25000; ++j) {
|
for (int64_t j = 0; j < 25000; ++j) {
|
||||||
@ -56,7 +77,7 @@ void test_signed_64bit()
|
|||||||
|
|
||||||
void test_unsigned_64bit()
|
void test_unsigned_64bit()
|
||||||
{
|
{
|
||||||
for (uint64_t i = 2; i < 25000; ++i) {
|
for (uint64_t i = 1; i < 25000; ++i) {
|
||||||
const Eigen::internal::TensorIntDivisor<uint64_t> div(i);
|
const Eigen::internal::TensorIntDivisor<uint64_t> div(i);
|
||||||
|
|
||||||
for (uint64_t j = 0; j < 25000; ++j) {
|
for (uint64_t j = 0; j < 25000; ++j) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user