mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-16 13:45:58 +08:00
Misc fixes for fp16
This commit is contained in:
parent
56a1757d74
commit
518149e868
@ -551,14 +551,14 @@ struct hash<Eigen::half> {
|
|||||||
|
|
||||||
|
|
||||||
// Add the missing shfl_xor intrinsic
|
// Add the missing shfl_xor intrinsic
|
||||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
||||||
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ldg() has an overload for __half, but we also need one for Eigen::half.
|
// ldg() has an overload for __half, but we also need one for Eigen::half.
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||||
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
||||||
return Eigen::internal::raw_uint16_to_half(
|
return Eigen::internal::raw_uint16_to_half(
|
||||||
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user