From 2b457f8e5ec2bd38ce5049bd9220d3f37c7fa1ff Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 4 Apr 2016 11:47:46 +0200 Subject: [PATCH 1/5] Fix cross-compiling windows version detection --- cmake/EigenDetermineOSVersion.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/EigenDetermineOSVersion.cmake b/cmake/EigenDetermineOSVersion.cmake index 3c48d4c37..9246fa67c 100644 --- a/cmake/EigenDetermineOSVersion.cmake +++ b/cmake/EigenDetermineOSVersion.cmake @@ -26,7 +26,7 @@ function(DetermineShortWindowsName WIN_VERSION win_num_version) endfunction() function(DetermineOSVersion OS_VERSION) - if (WIN32) + if (WIN32 AND CMAKE_HOST_SYSTEM_NAME MATCHES Windows) file (TO_NATIVE_PATH "$ENV{COMSPEC}" SHELL) exec_program( ${SHELL} ARGS "/c" "ver" OUTPUT_VARIABLE ver_output) From 1108b4f21836d52b50e4ec10a6e0eec027eda04d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 4 Apr 2016 11:09:25 -0700 Subject: [PATCH 2/5] Fixed the signature of numext::abs to make it compatible with complex numbers --- Eigen/src/Core/MathFunctions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 000cafee7..e6c7dfa08 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -1034,7 +1034,7 @@ double tan(const double &x) { return ::tan(x); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T abs(const T &x) { +typename NumTraits::Real abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } From c4179dd470f72520b9ffba5b78d4dd1261ccc609 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 4 Apr 2016 11:11:51 -0700 Subject: [PATCH 3/5] Updated the scalar_abs_op struct to make it compatible with cuda devices. --- Eigen/src/Core/functors/UnaryFunctors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 531beead6..46622f804 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -41,7 +41,7 @@ struct functor_traits > template struct scalar_abs_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) typedef typename NumTraits::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pabs(a); } From 03f2997a119578b894ee717aff23e2641ab78f37 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 4 Apr 2016 16:41:47 -0400 Subject: [PATCH 4/5] bug #1191 - Prevent Clang/ARM from rewriting VMLA into VMUL+VADD --- Eigen/src/Core/arch/NEON/PacketMath.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index fead02916..10ef1d2b3 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -186,7 +186,25 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, co // MLA: 10 GFlop/s ; FMA: 12 GFlops/s. template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); } #else -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { +#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM + // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu, + // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on + // -march=armv7-a, that is a very common case. + // See e.g. this thread: + // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html + Packet4f r = c; + asm volatile( + "vmla.f32 %q[r], %q[a], %q[b]" + : [r] "+w" (r) + : [a] "w" (a), + [b] "w" (b) + : ); + return r; +#else + return vmlaq_f32(c,a,b); +#endif +} #endif // No FMA instruction for int, so use MLA unconditionally. From 158fea0f5e15e4611c36ce73f582c484deeace1a Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 4 Apr 2016 16:42:40 -0400 Subject: [PATCH 5/5] bug #1190 - Don't trust __ARM_FEATURE_FMA on Clang/ARM --- Eigen/src/Core/arch/NEON/PacketMath.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 10ef1d2b3..63a2d9f52 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -177,7 +177,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, co return pset1(0); } -#ifdef __ARM_FEATURE_FMA +// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available, +// then implements a slow software scalar fallback calling fmaf()! +#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM) // See bug 936. // FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4. // FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.