mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Properly gate the use of cuda intrinsics in the code
This commit is contained in:
parent
97a1f1c273
commit
fcf59e1c37
@ -215,7 +215,7 @@ union FP32 {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static inline EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
static inline EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
||||||
#if defined(__CUDA_ARCH__) && defined(EIGEN_HAS_CUDA_FP16)
|
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
return __float2half(ff);
|
return __float2half(ff);
|
||||||
#else
|
#else
|
||||||
FP32 f; f.f = ff;
|
FP32 f; f.f = ff;
|
||||||
@ -263,7 +263,7 @@ static inline EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline EIGEN_DEVICE_FUNC float half_to_float(__half h) {
|
static inline EIGEN_DEVICE_FUNC float half_to_float(__half h) {
|
||||||
#if defined(__CUDA_ARCH__) && defined(EIGEN_HAS_CUDA_FP16)
|
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
return __half2float(h);
|
return __half2float(h);
|
||||||
#else
|
#else
|
||||||
const FP32 magic = { 113 << 23 };
|
const FP32 magic = { 113 << 23 };
|
||||||
@ -305,7 +305,7 @@ static inline EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) {
|
|||||||
return (a.x & 0x7fff) == 0x7c00;
|
return (a.x & 0x7fff) == 0x7c00;
|
||||||
}
|
}
|
||||||
static inline EIGEN_HALF_CUDA_H bool (isnan)(const Eigen::half& a) {
|
static inline EIGEN_HALF_CUDA_H bool (isnan)(const Eigen::half& a) {
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||||
return __hisnan(x);
|
return __hisnan(x);
|
||||||
#else
|
#else
|
||||||
return (a.x & 0x7fff) > 0x7c00;
|
return (a.x & 0x7fff) > 0x7c00;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user