diff --git a/CMakeLists.txt b/CMakeLists.txt index 15dbbc4fa..6f027cb44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,7 +158,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) if(EIGEN_TEST_NEON) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard -mfpu=neon -mcpu=cortex-a8") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8") message("Enabling NEON in tests/examples") endif() diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d1943ba3b..7d9e1fc7f 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -73,7 +73,8 @@ template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, con template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { - return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), p4ui_CONJ_XOR))); + Packet4ui b = vreinterpretq_u32_f32(a.v); + return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR))); } template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) @@ -116,8 +117,8 @@ template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); } -template<> EIGEN_STRONG_INLINE Packet2cf pload >(const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cf ploadu >(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf pload(const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } @@ -143,7 +144,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) return Packet2cf(a_r128); } -EIGEN_STRONG_INLINE Packet2cf pcplxflip/**/(const Packet2cf& x) +EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& a) { return Packet2cf(vrev64q_f32(a.v)); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index cae35d737..c09a5a885 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -45,6 +45,7 @@ namespace internal { typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; +typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ const Packet4f p4f_##NAME = pset1(X) @@ -176,22 +177,22 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) +template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) { - float32x2_t lo, ho; + float32x2_t lo, hi; lo = vdup_n_f32(*from); hi = vdup_n_f32(*from); return vcombine_f32(lo, hi); } -template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const float* from) +template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) { - int32x2_t lo, ho; + int32x2_t lo, hi; lo = vdup_n_s32(*from); hi = vdup_n_s32(*from); return vcombine_s32(lo, hi);