From 513e357b4806fba7d3420ab041d6d4a58fea1939 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 17 Jul 2015 15:35:16 -0700 Subject: [PATCH] Added support for prefetching on cuda devices --- Eigen/src/Core/GenericPacketMath.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 8a7a0eddc..cbb15f79d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -281,7 +281,15 @@ template EIGEN_DEVICE_FUNC inline void pstoreu /** \internal tries to do cache prefetching of \a addr */ template inline void prefetch(const Scalar* addr) { -#if !EIGEN_COMP_MSVC +#ifdef __CUDA_ARCH__ +#if defined(__LP64__) + // 64-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); +#else + // 32-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); +#endif +#elif !EIGEN_COMP_MSVC __builtin_prefetch(addr); #endif }