From 5daebe0a277dc5a1034fb355469ef28fb139b2c4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 1 Sep 2014 10:58:07 +0200 Subject: [PATCH] bug #871: fix compilation on ARM/Neon regarding __has_builtin usage (backport) --- Eigen/src/Core/arch/NEON/PacketMath.h | 15 ++++++++++++--- Eigen/src/Core/util/Macros.h | 7 +++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 163bac215..74bd859a4 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -48,9 +48,18 @@ typedef uint32x4_t Packet4ui; #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} #endif - -#ifndef __pld -#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" ); + +// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function +// which available on LLVM and GCC (at least) +#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__) + #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); +#elif defined __pld + #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) +#elif !defined(__aarch64__) + #define EIGEN_ARM_PREFETCH(ADDR) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#else + // by default no explicit prefetching + #define EIGEN_ARM_PREFETCH(ADDR) #endif template<> struct packet_traits : default_packet_traits diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 3a010ec6a..17b29bec1 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -96,6 +96,13 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t #endif +// Cross compiler wrapper around LLVM's __has_builtin +#ifdef __has_builtin +# define EIGEN_HAS_BUILTIN(x) __has_builtin(x) +#else +# define EIGEN_HAS_BUILTIN(x) 0 +#endif + /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: