diff --git a/Eigen/Core b/Eigen/Core index 5aeb4a625..98848dd19 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -14,6 +14,22 @@ // first thing Eigen does: stop the compiler from committing suicide #include "src/Core/util/DisableStupidWarnings.h" +#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA) + #define EIGEN_CUDACC __CUDACC__ +#endif + +#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA) + #define EIGEN_CUDA_ARCH __CUDA_ARCH__ +#endif + +#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9) +#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100)) +#elif defined(__CUDACC_VER__) +#define EIGEN_CUDACC_VER __CUDACC_VER__ +#else +#define EIGEN_CUDACC_VER 0 +#endif + // Handle NVCC/CUDA/SYCL #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) // Do not try asserts on CUDA and SYCL! @@ -229,7 +245,7 @@ #if defined __CUDACC__ #define EIGEN_VECTORIZE_CUDA #include - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #if EIGEN_CUDACC_VER >= 70500 #define EIGEN_HAS_CUDA_FP16 #endif #endif diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 294c517ea..b7c2583f7 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { - return half(::expf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hexp(a)); +#else + return half(::expf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return Eigen::half(::hlog(a)); +#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 + return half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { - return half(::sqrtf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hsqrt(a)); +#else + return half(::sqrtf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hfloor(a)); +#else return half(::floorf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hceil(a)); +#else return half(::ceilf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { @@ -557,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { return Eigen::half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return Eigen::half(::hlog(a)); #else return Eigen::half(::logf(float(a))); diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index ae54225f8..943e0b06d 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { return __floats2half2_rn(r1, r2); } -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 template<> __device__ EIGEN_STRONG_INLINE half2 plog(const half2& a) { diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 38d6ddb9a..c5ce43981 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -399,7 +399,7 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 @@ -413,7 +413,7 @@ #ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above -#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) +#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ @@ -812,7 +812,8 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0) + // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu index cb2e4167a..0ff13477d 100644 --- a/test/cuda_basic.cu +++ b/test/cuda_basic.cu @@ -20,7 +20,7 @@ #include #include -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu index 653443dc5..0dfd6cfe1 100644 --- a/unsupported/test/cxx11_tensor_argmax_cuda.cu +++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu @@ -12,7 +12,7 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu index 88c233994..83a740e7a 100644 --- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu @@ -13,7 +13,7 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu index d4e111f5d..509c15c6e 100644 --- a/unsupported/test/cxx11_tensor_complex_cuda.cu +++ b/unsupported/test/cxx11_tensor_complex_cuda.cu @@ -11,7 +11,7 @@ #define EIGEN_TEST_FUNC cxx11_tensor_complex #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu index 2baf5eaad..9133fce5a 100644 --- a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu +++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -11,7 +11,7 @@ #define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu index dd68430ce..0b2f3f0f4 100644 --- a/unsupported/test/cxx11_tensor_contract_cuda.cu +++ b/unsupported/test/cxx11_tensor_contract_cuda.cu @@ -14,7 +14,7 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 0ba9d52e9..ad8c9662f 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -12,7 +12,7 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu index fde20ddf2..ae21f492a 100644 --- a/unsupported/test/cxx11_tensor_device.cu +++ b/unsupported/test/cxx11_tensor_device.cu @@ -13,7 +13,7 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 2f86980a2..463cc4bd6 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -13,7 +13,7 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu index b3be199e1..94d5f4e5a 100644 --- a/unsupported/test/cxx11_tensor_random_cuda.cu +++ b/unsupported/test/cxx11_tensor_random_cuda.cu @@ -13,7 +13,7 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu index 6858b43a7..fd09d013b 100644 --- a/unsupported/test/cxx11_tensor_reduction_cuda.cu +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu @@ -12,7 +12,7 @@ #define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if dEIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h" diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu index 5f146f3c9..46571cfea 100644 --- a/unsupported/test/cxx11_tensor_scan_cuda.cu +++ b/unsupported/test/cxx11_tensor_scan_cuda.cu @@ -13,7 +13,7 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#if EIGEN_CUDACC_VER >= 70500 #include #endif #include "main.h"