mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Fix some CUDA warnings.
Added `EIGEN_HAS_STD_HASH` macro, checking for C++11 support and not running on GPU. `std::hash<float>` is not a device function, so cannot be used by `std::hash<bfloat16>`. Removed `EIGEN_DEVICE_FUNC` and only define if `EIGEN_HAS_STD_HASH`. Same for `half`. Added `EIGEN_CUDA_HAS_FP16_ARITHMETIC` to improve readability, eliminate warnings about `EIGEN_CUDA_ARCH` not being defined. Replaced a couple C-style casts with `reinterpret_cast` for aligned loading of `half*` to `half2*`. This eliminates `-Wcast-align` warnings in clang. Although not ideal due to potential type aliasing, this is how CUDA handles these conversions internally.
This commit is contained in:
parent
88d4c6d4c8
commit
db5691ff2b
@ -655,20 +655,6 @@ template<> struct NumTraits<Eigen::bfloat16>
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
namespace std {
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
template <>
|
||||
struct hash<Eigen::bfloat16> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
|
||||
return hash<float>()(static_cast<float>(a));
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace std
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
namespace numext {
|
||||
|
||||
@ -703,4 +689,16 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat1
|
||||
} // namespace numext
|
||||
} // namespace Eigen
|
||||
|
||||
#if EIGEN_HAS_STD_HASH
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Eigen::bfloat16> {
|
||||
EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
|
||||
return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
#endif
|
||||
|
||||
|
||||
#endif // EIGEN_BFLOAT16_H
|
||||
|
@ -757,19 +757,6 @@ template<> struct NumTraits<Eigen::half>
|
||||
#pragma pop_macro("EIGEN_CONSTEXPR")
|
||||
#endif
|
||||
|
||||
namespace std {
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
template <>
|
||||
struct hash<Eigen::half> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
|
||||
return static_cast<std::size_t>(a.x);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace std
|
||||
|
||||
namespace Eigen {
|
||||
namespace numext {
|
||||
|
||||
@ -870,4 +857,15 @@ EIGEN_STRONG_INLINE __device__ Eigen::half __ldg(const Eigen::half* ptr) {
|
||||
}
|
||||
#endif // __ldg
|
||||
|
||||
#if EIGEN_HAS_STD_HASH
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Eigen::half> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
|
||||
return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
|
||||
}
|
||||
};
|
||||
} // end namespace std
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_HALF_H
|
||||
|
@ -15,12 +15,16 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
// Read-only data cached load available.
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350)
|
||||
#define EIGEN_GPU_HAS_LDG 1
|
||||
#endif
|
||||
|
||||
// FP16 math available.
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
|
||||
#if (defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530)
|
||||
#define EIGEN_CUDA_HAS_FP16_ARITHMETIC 1
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE) || defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
|
||||
#define EIGEN_GPU_HAS_FP16_ARITHMETIC 1
|
||||
#endif
|
||||
|
||||
@ -603,7 +607,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
|
||||
const Eigen::half* from) {
|
||||
#if defined(EIGEN_GPU_HAS_LDG)
|
||||
return __ldg((const half2*)from);
|
||||
// Input is guaranteed to be properly aligned.
|
||||
return __ldg(reinterpret_cast<const half2*>(from));
|
||||
#else
|
||||
return combine_half(*(from+0), *(from+1));
|
||||
#endif
|
||||
@ -922,7 +927,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) {
|
||||
return __floats2half2_rn(r1, r2);
|
||||
}
|
||||
|
||||
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \
|
||||
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || \
|
||||
defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -1033,7 +1038,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
|
||||
ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
|
||||
#if defined(EIGEN_GPU_HAS_LDG)
|
||||
Packet4h2 r;
|
||||
r = __ldg((const Packet4h2*)from);
|
||||
r = __ldg(reinterpret_cast<const Packet4h2*>(from));
|
||||
return r;
|
||||
#else
|
||||
Packet4h2 r;
|
||||
@ -1226,7 +1231,7 @@ plset<Packet4h2>(const Eigen::half& a) {
|
||||
p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)),
|
||||
__hadd(a, __float2half(7.0f)));
|
||||
return r;
|
||||
#elif EIGEN_CUDA_ARCH >= 530
|
||||
#elif defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
|
||||
Packet4h2 r;
|
||||
half2* r_alias = reinterpret_cast<half2*>(&r);
|
||||
|
||||
@ -1478,7 +1483,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(
|
||||
predux_max(a_alias[3]));
|
||||
__half first = predux_max(m0);
|
||||
__half second = predux_max(m1);
|
||||
#if EIGEN_CUDA_ARCH >= 530
|
||||
#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
|
||||
return (__hgt(first, second) ? first : second);
|
||||
#else
|
||||
float ffirst = __half2float(first);
|
||||
@ -1497,7 +1502,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(
|
||||
predux_min(a_alias[3]));
|
||||
__half first = predux_min(m0);
|
||||
__half second = predux_min(m1);
|
||||
#if EIGEN_CUDA_ARCH >= 530
|
||||
#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
|
||||
return (__hlt(first, second) ? first : second);
|
||||
#else
|
||||
float ffirst = __half2float(first);
|
||||
@ -1669,6 +1674,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a,
|
||||
#endif // defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
|
||||
|
||||
#undef EIGEN_GPU_HAS_LDG
|
||||
#undef EIGEN_CUDA_HAS_FP16_ARITHMETIC
|
||||
#undef EIGEN_GPU_HAS_FP16_ARITHMETIC
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -668,6 +668,17 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Does the compiler support std::hash?
|
||||
#ifndef EIGEN_HAS_STD_HASH
|
||||
// The std::hash struct is defined in C++11 but is not labelled as a __device__
|
||||
// function and is not constexpr, so cannot be used on device.
|
||||
#if EIGEN_HAS_CXX11 && !EIGEN_GPU_COMPILE_PHASE
|
||||
#define EIGEN_HAS_STD_HASH 1
|
||||
#else
|
||||
#define EIGEN_HAS_STD_HASH 0
|
||||
#endif
|
||||
#endif // EIGEN_HAS_STD_HASH
|
||||
|
||||
#ifndef EIGEN_HAS_ALIGNAS
|
||||
#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 && \
|
||||
( __has_feature(cxx_alignas) \
|
||||
|
Loading…
x
Reference in New Issue
Block a user