diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index dd1234b9f..e57d9c91c 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -57,16 +57,6 @@ typedef eigen_packet_wrapper Packet4ui; typedef eigen_packet_wrapper Packet2l; typedef eigen_packet_wrapper Packet2ul; -EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) { - float from[4] = {a, b, c, d}; - return vld1q_f32(from); -} - -EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) { - float from[2] = {a, b}; - return vld1_f32(from); -} - #else typedef float32x2_t Packet2f; @@ -88,11 +78,18 @@ typedef uint32x4_t Packet4ui; typedef int64x2_t Packet2l; typedef uint64x2_t Packet2ul; -EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) { return {a, b, c, d}; } -EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) { return {a, b}; } - #endif // EIGEN_COMP_MSVC_STRICT +EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) { + float from[4] = {a, b, c, d}; + return vld1q_f32(from); +} + +EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) { + float from[2] = {a, b}; + return vld1_f32(from); +} + EIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f& m, int mask){ const float* a = reinterpret_cast(&m); Packet4f res = make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3 )), *(a + ((mask >> 6) & 3))); @@ -3665,20 +3662,16 @@ template float64x2_t vreinterpretq_f64_u64(T a) { return (float64x2 #if EIGEN_COMP_MSVC_STRICT typedef eigen_packet_wrapper Packet2d; typedef eigen_packet_wrapper Packet1d; +#else +typedef float64x2_t Packet2d; +typedef float64x1_t Packet1d; +#endif EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) { double from[2] = {a, b}; return vld1q_f64(from); } -#else -typedef float64x2_t Packet2d; -typedef float64x1_t Packet1d; - -EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) { return {a, b}; } -#endif - - // fuctionally equivalent to _mm_shuffle_pd in SSE (i.e. shuffle(m, n, mask) equals _mm_shuffle_pd(m,n,mask)) // Currently used in LU/arch/InverseSize4.h to enable a shared implementation // for fast inversion of matrices of size 4. diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h index 1bc51b0b1..c546466a1 100644 --- a/Eigen/src/Core/arch/NEON/TypeCasting.h +++ b/Eigen/src/Core/arch/NEON/TypeCasting.h @@ -41,7 +41,7 @@ EIGEN_STRONG_INLINE Packet4c preinterpret(const Packet4uc& } template <> EIGEN_STRONG_INLINE Packet8c preinterpret(const Packet8uc& a) { - return Packet8c(preinterpret(a)); + return Packet8c(vreinterpret_s8_u8(a)); } template <> EIGEN_STRONG_INLINE Packet16c preinterpret(const Packet16uc& a) { @@ -1373,7 +1373,7 @@ struct type_casting_traits { template <> EIGEN_STRONG_INLINE Packet2d pcast(const Packet8us& a) { // Discard all but first two values. - Packet2f tmp = pcast(vget_low_s16(a)); + Packet2f tmp = pcast(vget_low_u16(a)); return vcvt_f64_f32(tmp); }