From 51be91f15e745fbcc7b1b3584b2d0b947a500272 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 21 Dec 2015 08:42:58 -0800 Subject: [PATCH] Added support for CUDA architectures that don's support for 3.5 capabilities --- Eigen/src/Core/arch/CUDA/PacketMath.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index cb1b547e0..4495b3741 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -183,25 +183,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(double* to to[1] = from.y; } -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro(const float* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return __ldg((const float4*)from); +#else + return make_float4(from[0], from[1], from[2], from[3]); +#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro(const double* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return __ldg((const double2*)from); +#else + return make_float2(from[0], from[1]); +#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro(const float* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3)); +#else + return make_float4(from[0], from[1], from[2], from[3]); +#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro(const double* from) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 return make_double2(__ldg(from+0), __ldg(from+1)); -} +#else + return make_float2(from[0], from[1]); #endif +} template<> EIGEN_DEVICE_FUNC inline float4 pgather(const float* from, Index stride) { return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);