From 2bf8fe1489102bad4be90ac3c8397a63b8dd398f Mon Sep 17 00:00:00 2001 From: Tobias Wood Date: Thu, 22 Aug 2024 22:46:16 +0000 Subject: [PATCH] NEON Complex Intrinsics --- Eigen/src/Core/arch/NEON/Complex.h | 61 +++++++++++++++++++++++++++ Eigen/src/Core/arch/NEON/PacketMath.h | 15 +++++++ 2 files changed, 76 insertions(+) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 5257c03c8..a0467113c 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -108,6 +108,16 @@ EIGEN_STRONG_INLINE Packet2cf pcast(const Packet2f& a) { return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); } +template <> +EIGEN_STRONG_INLINE Packet1cf pzero(const Packet1cf& /*a*/) { + return Packet1cf(vdup_n_f32(0.0f)); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf pzero(const Packet2cf& /*a*/) { + return Packet2cf(vdupq_n_f32(0.0f)); +} + template <> EIGEN_STRONG_INLINE Packet1cf pset1(const std::complex& from) { return Packet1cf(vld1_f32(reinterpret_cast(&from))); @@ -156,6 +166,20 @@ EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR()))); } +#ifdef __ARM_FEATURE_COMPLEX +template <> +EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& a, const Packet1cf& b, const Packet1cf& c) { + Packet1cf result; + result.v = vcmla_f32(c.v, a.v, b.v); + result.v = vcmla_rot90_f32(result.v, a.v, b.v); + return result; +} + +template <> +EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) { + return pmadd(a, b, pzero(a)); +} +#else template <> EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) { Packet2f v1, v2; @@ -175,6 +199,22 @@ EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1c // Add and return the result return Packet1cf(vadd_f32(v1, v2)); } +#endif + +#ifdef __ARM_FEATURE_COMPLEX +template <> +EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) { + Packet2cf result; + result.v = vcmlaq_f32(c.v, a.v, b.v); + result.v = vcmlaq_rot90_f32(result.v, a.v, b.v); + return result; +} + +template <> +EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { + return pmadd(a, b, pzero(a)); +} +#else template <> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { Packet4f v1, v2; @@ -194,6 +234,7 @@ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2c // Add and return the result return Packet2cf(vaddq_f32(v1, v2)); } +#endif template <> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b) { @@ -523,6 +564,11 @@ EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu(reinterpret_cast(from))); } +template <> +EIGEN_STRONG_INLINE Packet1cd pzero(const Packet1cd& /*a*/) { + return Packet1cd(vdupq_n_f64(0.0)); +} + template <> EIGEN_STRONG_INLINE Packet1cd pset1(const std::complex& from) { /* here we really have to use unaligned loads :( */ @@ -549,6 +595,20 @@ EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); } +#ifdef __ARM_FEATURE_COMPLEX +template <> +EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) { + Packet1cd result; + result.v = vcmlaq_f64(c.v, a.v, b.v); + result.v = vcmlaq_rot90_f64(result.v, a.v, b.v); + return result; +} + +template <> +EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { + return pmadd(a, b, pzero(a)); +} +#else template <> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { Packet2d v1, v2; @@ -568,6 +628,7 @@ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1c // Add and return the result return Packet1cd(vaddq_f64(v1, v2)); } +#endif template <> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 56e8b2d51..2f59eeb3d 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -654,6 +654,16 @@ struct unpacket_traits { }; }; +template <> +EIGEN_STRONG_INLINE Packet2f pzero(const Packet2f& /*a*/) { + return vdup_n_f32(0.0f); +} + +template <> +EIGEN_STRONG_INLINE Packet4f pzero(const Packet4f& /*a*/) { + return vdupq_n_f32(0.0f); +} + template <> EIGEN_STRONG_INLINE Packet2f pset1(const float& from) { return vdup_n_f32(from); @@ -5147,6 +5157,11 @@ struct unpacket_traits { }; }; +template <> +EIGEN_STRONG_INLINE Packet2d pzero(const Packet2d& /*a*/) { + return vdupq_n_f64(0.0); +} + template <> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return vdupq_n_f64(from);