From 9b6d624eabc0207a2ec6954c5c60657758c7617d Mon Sep 17 00:00:00 2001 From: Charles Schlosser Date: Tue, 8 Nov 2022 20:03:01 +0000 Subject: [PATCH] fix neon --- Eigen/src/Core/arch/NEON/PacketMath.h | 28 ++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index a024c1cd0..08b2c211d 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -2372,11 +2372,14 @@ template<> EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) { } template<> EIGEN_STRONG_INLINE Packet2ul pabs(const Packet2ul& a) { return a; } -template<> EIGEN_STRONG_INLINE Packet4h psignbit(const Packet4h& a) { return vreinterpret_f16_s16( vshr_n_s16( vreinterpret_s16_f16(a), 15)); } -template<> EIGEN_STRONG_INLINE Packet8h psignbit(const Packet8h& a) { return vreinterpretq_f16_s16(vshrq_n_s16(vreinterpretq_s16_f16(a), 15)); } -template<> EIGEN_STRONG_INLINE Packet2f psignbit(const Packet2f& a) { return vreinterpret_f32_s32( vshr_n_s32( vreinterpret_s32_f32(a), 31)); } -template<> EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) { return vreinterpretq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(a), 31)); } -template<> EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) { return vreinterpretq_f64_s64(vshrq_n_s64(vreinterpretq_s64_f64(a), 63)); } +template <> +EIGEN_STRONG_INLINE Packet2f psignbit(const Packet2f& a) { + return vreinterpret_f32_s32(vshr_n_s32(vreinterpret_s32_f32(a), 31)); +} +template <> +EIGEN_STRONG_INLINE Packet4f psignbit(const Packet4f& a) { + return vreinterpretq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(a), 31)); +} template<> EIGEN_STRONG_INLINE Packet2f pfrexp(const Packet2f& a, Packet2f& exponent) { return pfrexp_generic(a,exponent); } @@ -3910,6 +3913,11 @@ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } +template <> +EIGEN_STRONG_INLINE Packet2d psignbit(const Packet2d& a) { + return vreinterpretq_f64_s64(vshrq_n_s64(vreinterpretq_s64_f64(a), 63)); +} + template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { return vaddvq_f64(a); } @@ -4471,11 +4479,21 @@ EIGEN_STRONG_INLINE Packet8hf pabs(const Packet8hf& a) { return vabsq_f16(a); } +template<> +EIGEN_STRONG_INLINE Packet8hf psignbit(const Packet8hf& a) { + return vreinterpretq_f16_s16(vshrq_n_s16(vreinterpretq_s16_f16(a), 15)); +} + template <> EIGEN_STRONG_INLINE Packet4hf pabs(const Packet4hf& a) { return vabs_f16(a); } +template <> +EIGEN_STRONG_INLINE Packet4hf psignbit(const Packet4hf& a) { + return vreinterpret_f16_s16( vshr_n_s16( vreinterpret_s16_f16(a), 15)); +} + template <> EIGEN_STRONG_INLINE Eigen::half predux(const Packet8hf& a) { float16x4_t a_lo, a_hi, sum;