From 2dbfd83424cd0d30dac3b42b27b970b44a4e4541 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 25 Apr 2014 02:46:57 -0700 Subject: [PATCH] Implement pbroadcast4 on altivec --- Eigen/src/Core/arch/AltiVec/Complex.h | 2 +- Eigen/src/Core/arch/AltiVec/PacketMath.h | 26 ++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 8fdffad5e..5409ddedd 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -229,7 +229,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); } -template<> EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0); kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 8a67354e4..0e9adf450 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -168,6 +168,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return vc; } + +template<> EIGEN_STRONG_INLINE void +pbroadcast4(const float *a, + Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) +{ + a3 = vec_ld(0,a); + a0 = vec_splat(a3, 0); + a1 = vec_splat(a3, 1); + a2 = vec_splat(a3, 2); + a3 = vec_splat(a3, 3); +} +template<> EIGEN_STRONG_INLINE void +pbroadcast4(const int *a, + Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3) +{ + a3 = vec_ld(0,a); + a0 = vec_splat(a3, 0); + a1 = vec_splat(a3, 1); + a2 = vec_splat(a3, 2); + a3 = vec_splat(a3, 3); +} + template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, int stride) { float EIGEN_ALIGN16 af[4]; @@ -538,7 +560,7 @@ struct palign_impl } }; -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { Packet4f t0, t1, t2, t3; t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]); @@ -551,7 +573,7 @@ ptranspose(PacketBlock& kernel) { kernel.packet[3] = vec_mergel(t1, t3); } -template<> EIGEN_DEVICE_FUNC inline void +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { Packet4i t0, t1, t2, t3; t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);