diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index b7b5e7d22..e065fa714 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -153,7 +153,9 @@ template struct NumTraits > MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost }; + EIGEN_DEVICE_FUNC static inline Real epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return NumTraits::dummy_precision(); } }; @@ -166,7 +168,7 @@ struct NumTraits > typedef typename NumTraits::NonInteger NonIntegerScalar; typedef Array NonInteger; typedef ArrayType & Nested; - + enum { IsComplex = NumTraits::IsComplex, IsInteger = NumTraits::IsInteger, @@ -176,8 +178,10 @@ struct NumTraits > AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost }; - + + EIGEN_DEVICE_FUNC static inline RealScalar epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } }; diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 6c412159c..212aa0d5d 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -341,6 +341,18 @@ template<> struct is_arithmetic { enum { value = true }; }; } // end namespace internal +template<> struct NumTraits + : GenericNumTraits +{ + EIGEN_DEVICE_FUNC static inline float dummy_precision() { return 1e-3f; } + EIGEN_DEVICE_FUNC static inline Eigen::half highest() { + return internal::raw_uint16_to_half(0x7bff); + } + EIGEN_DEVICE_FUNC static inline Eigen::half lowest() { + return internal::raw_uint16_to_half(0xfbff); + } +}; + // Infinity/NaN checks. namespace numext { @@ -348,7 +360,7 @@ namespace numext { static inline EIGEN_DEVICE_FUNC bool (isinf)(const Eigen::half& a) { return (a.x & 0x7fff) == 0x7c00; } -static inline EIGEN_HALF_CUDA_H bool (isnan)(const Eigen::half& a) { +static inline EIGEN_DEVICE_FUNC bool (isnan)(const Eigen::half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hisnan(a); #else @@ -403,6 +415,15 @@ using ::sqrt; using ::floor; using ::ceil; +#if __cplusplus > 199711L +template <> +struct hash { + size_t operator()(const Eigen::half& a) const { + return std::hash()(a.x); + } +}; +#endif + } // end namespace std @@ -411,7 +432,14 @@ using ::ceil; __device__ inline Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { return static_cast(__shfl_xor(static_cast(var), laneMask, width)); } +#endif +// ldg() has an overload for __half, but we also need one for Eigen::half. +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320 +static inline EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { + return Eigen::internal::raw_uint16_to_half( + __ldg(reinterpret_cast(ptr))); +} #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 7086a426d..d6db45ade 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -10,8 +10,9 @@ #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H -// NVCC fails to compile this code -#if !defined(__CUDACC__) +// This code requires the ability to initialize arrays of constant +// values directly inside a class. +#if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900 namespace Eigen { @@ -564,7 +565,7 @@ struct TensorEvaluator, D // This will support a maximum FFT size of 2^32 for each dimension // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; - RealScalar m_sin_PI_div_n_LUT[32] = { + const RealScalar m_sin_PI_div_n_LUT[32] = { RealScalar(0.0), RealScalar(-2), RealScalar(-0.999999999999999), @@ -600,7 +601,7 @@ struct TensorEvaluator, D }; // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); - RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { + const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { RealScalar(0.0), RealScalar(0.0), RealScalar(-1.00000000000000e+00), @@ -638,7 +639,7 @@ struct TensorEvaluator, D } // end namespace Eigen -#endif // __CUDACC__ +#endif // EIGEN_HAS_CONSTEXPR #endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H