mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-04 04:05:19 +08:00
Added proper support for fast 64bit integer division on CUDA
This commit is contained in:
parent
f37a5f1c53
commit
383d1cc2ed
@ -61,13 +61,8 @@ namespace {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct DividerTraits {
|
struct DividerTraits {
|
||||||
#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__)
|
|
||||||
typedef typename UnsignedTraits<T>::type type;
|
typedef typename UnsignedTraits<T>::type type;
|
||||||
static const int N = sizeof(T) * 8;
|
static const int N = sizeof(T) * 8;
|
||||||
#else
|
|
||||||
typedef uint32_t type;
|
|
||||||
static const int N = 32;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -79,40 +74,38 @@ namespace {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__)
|
|
||||||
template <typename T>
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
|
|
||||||
return __umul64hi(a, b);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
|
||||||
#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__)
|
#if defined(__CUDA_ARCH__)
|
||||||
|
return __umul64hi(a, b);
|
||||||
|
#elif defined(__SIZEOF_INT128__)
|
||||||
__uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
|
__uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
|
||||||
return static_cast<uint64_t>(v >> 64);
|
return static_cast<uint64_t>(v >> 64);
|
||||||
#else
|
#else
|
||||||
EIGEN_STATIC_ASSERT(sizeof(T) == 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
return (TensorUInt128<static_val<0>, uint64_t>(a) * TensorUInt128<static_val<0>, uint64_t>(b)).upper();
|
||||||
return (a * b) >> 32;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
template <int N, typename T>
|
template <int N, typename T>
|
||||||
struct DividerHelper {
|
struct DividerHelper {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier (const int log_div, const T divider) {
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) {
|
||||||
EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
return static_cast<uint32_t>((static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1);
|
return static_cast<uint32_t>((static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__)
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct DividerHelper<64, T> {
|
struct DividerHelper<64, T> {
|
||||||
static EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
|
||||||
|
#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__)
|
||||||
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
|
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
|
||||||
|
#else
|
||||||
|
const uint64_t shift = 1ULL << log_div;
|
||||||
|
TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
|
||||||
|
return static_cast<uint64_t>(result);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user