Altivec fixes for Darwin: do not use unsupported VSX insns

This commit is contained in:
Sergey Fedorov 2023-01-12 16:33:33 +00:00 committed by Antonio Sánchez
parent 6156797016
commit 4d05765345
6 changed files with 43 additions and 38 deletions

View File

@ -18,7 +18,7 @@ namespace Eigen {
namespace internal { namespace internal {
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
#if defined(_BIG_ENDIAN) #if defined(_BIG_ENDIAN)
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
@ -103,7 +103,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasMin = 0, HasMin = 0,
HasMax = 0, HasMax = 0,
HasSqrt = 1, HasSqrt = 1,
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
HasBlend = 1, HasBlend = 1,
#endif #endif
HasSetLinear = 0 HasSetLinear = 0
@ -115,7 +115,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{ {
Packet2cf res; Packet2cf res;
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
// Load a single std::complex<float> from memory and duplicate // Load a single std::complex<float> from memory and duplicate
// //
// Using pload would read past the end of the reference in this case // Using pload would read past the end of the reference in this case
@ -151,7 +151,7 @@ template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<float> >(std::c
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>& from0, const std::complex<float>& from1) EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>& from0, const std::complex<float>& from1)
{ {
Packet4f res0, res1; Packet4f res0, res1;
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
// Load two std::complex<float> from memory and combine // Load two std::complex<float> from memory and combine
__asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (from0)); __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (from0));
__asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (from1)); __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (from1));
@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel) EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
{ {
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
Packet4f tmp = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v))); Packet4f tmp = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v)));
kernel.packet[1].v = reinterpret_cast<Packet4f>(vec_mergel(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v))); kernel.packet[1].v = reinterpret_cast<Packet4f>(vec_mergel(reinterpret_cast<Packet2d>(kernel.packet[0].v), reinterpret_cast<Packet2d>(kernel.packet[1].v)));
#else #else
@ -284,7 +284,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packe
return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV))); return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV)));
} }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
Packet2cf result; Packet2cf result;
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v))); result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
@ -298,7 +298,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a)
} }
//---------- double ---------- //---------- double ----------
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
struct Packet1cd struct Packet1cd
{ {
EIGEN_STRONG_INLINE Packet1cd() {} EIGEN_STRONG_INLINE Packet1cd() {}

View File

@ -60,7 +60,7 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
return patan_float(_x); return patan_float(_x);
} }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
#ifndef EIGEN_COMP_CLANG #ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f prsqrt<Packet4f>(const Packet4f& x) Packet4f prsqrt<Packet4f>(const Packet4f& x)

View File

@ -599,7 +599,7 @@ EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2(Packet2cf a)
EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2(Packet1cd a) EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2(Packet1cd a)
{ {
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
return Packet1cd(__builtin_vsx_xxpermdi(a.v, a.v, 2)); return Packet1cd(__builtin_vsx_xxpermdi(a.v, a.v, 2));
#else #else
return Packet1cd(Packet2d(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX64_XORFLIP))); return Packet1cd(Packet2d(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX64_XORFLIP)));
@ -610,7 +610,7 @@ EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2(Packet1cd a)
EIGEN_ALWAYS_INLINE Packet4f pload_complex_half(std::complex<float>* src) EIGEN_ALWAYS_INLINE Packet4f pload_complex_half(std::complex<float>* src)
{ {
Packet4f t; Packet4f t;
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
// Load float64/two float32 (doubleword alignment) // Load float64/two float32 (doubleword alignment)
__asm__("lxsdx %x0,%y1" : "=wa" (t) : "Z" (*src)); __asm__("lxsdx %x0,%y1" : "=wa" (t) : "Z" (*src));
#else #else
@ -636,7 +636,7 @@ EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar* src, Packet4f& r, Packet4f& i
template<typename RhsScalar> template<typename RhsScalar>
EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar* src, Packet2d& r, Packet2d& i) EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar* src, Packet2d& r, Packet2d& i)
{ {
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
__asm__("lxvdsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<double*>(src) + 0))); __asm__("lxvdsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
__asm__("lxvdsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<double*>(src) + 1))); __asm__("lxvdsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<double*>(src) + 1)));
#else #else
@ -675,7 +675,7 @@ EIGEN_ALWAYS_INLINE void pload_realimag_row(RhsScalar* src, Packet2d& r, Packet2
/** \internal load and splat a complex value into a vector - column-wise */ /** \internal load and splat a complex value into a vector - column-wise */
EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine(std::complex<float>* src) EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine(std::complex<float>* src)
{ {
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
Packet4f ret; Packet4f ret;
__asm__("lxvdsx %x0,%y1" : "=wa" (ret) : "Z" (*(reinterpret_cast<double*>(src) + 0))); __asm__("lxvdsx %x0,%y1" : "=wa" (ret) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
return ret; return ret;

View File

@ -173,7 +173,7 @@ struct packet_traits<float> : default_packet_traits {
HasATan = 1, HasATan = 1,
HasLog = 1, HasLog = 1,
HasExp = 1, HasExp = 1,
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
HasSqrt = 1, HasSqrt = 1,
#if !EIGEN_COMP_CLANG #if !EIGEN_COMP_CLANG
HasRsqrt = 1, HasRsqrt = 1,
@ -218,7 +218,7 @@ struct packet_traits<bfloat16> : default_packet_traits {
HasCos = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH,
HasLog = 1, HasLog = 1,
HasExp = 1, HasExp = 1,
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
HasSqrt = 1, HasSqrt = 1,
#if !EIGEN_COMP_CLANG #if !EIGEN_COMP_CLANG
HasRsqrt = 1, HasRsqrt = 1,
@ -446,7 +446,7 @@ EIGEN_STRONG_INLINE Packet pload_common(const __UNPACK_TYPE__(Packet)* from)
// ignoring these warnings for now. // ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(from); EIGEN_UNUSED_VARIABLE(from);
EIGEN_DEBUG_ALIGNED_LOAD EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
#else #else
return vec_ld(0, from); return vec_ld(0, from);
@ -501,7 +501,7 @@ EIGEN_ALWAYS_INLINE Packet pload_ignore(const __UNPACK_TYPE__(Packet)* from)
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif #endif
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
#else #else
return vec_ld(0, from); return vec_ld(0, from);
@ -608,7 +608,7 @@ EIGEN_STRONG_INLINE void pstore_common(__UNPACK_TYPE__(Packet)* to, const Packet
// ignoring these warnings for now. // ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(to); EIGEN_UNUSED_VARIABLE(to);
EIGEN_DEBUG_ALIGNED_STORE EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
vec_xst(from, 0, to); vec_xst(from, 0, to);
#else #else
vec_st(from, 0, to); vec_st(from, 0, to);
@ -1054,7 +1054,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i&
template<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) { return vec_madd(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c) { return vec_madd(a,b,c); }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_msub(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_msub(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmsub(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmsub(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmadd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmadd(a,b,c); }
@ -1062,7 +1062,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
// NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN // NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN
Packet4f ret; Packet4f ret;
__asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
@ -1080,7 +1080,7 @@ template<> EIGEN_STRONG_INLINE Packet16uc pmin<Packet16uc>(const Packet16uc& a,
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
{ {
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
// NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN // NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN
Packet4f ret; Packet4f ret;
__asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
@ -1103,27 +1103,27 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const
return vec_nor(c,c); return vec_nor(c,c);
} }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmple(a,b)); } template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmple(a,b)); }
#endif #endif
template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmplt(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet8s pcmp_le(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmple(a,b)); } template<> EIGEN_STRONG_INLINE Packet8s pcmp_le(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmple(a,b)); }
#endif #endif
template<> EIGEN_STRONG_INLINE Packet8s pcmp_lt(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet8s pcmp_lt(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmplt(a,b)); }
template<> EIGEN_STRONG_INLINE Packet8s pcmp_eq(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet8s pcmp_eq(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmpeq(a,b)); }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet8us pcmp_le(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmple(a,b)); } template<> EIGEN_STRONG_INLINE Packet8us pcmp_le(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmple(a,b)); }
#endif #endif
template<> EIGEN_STRONG_INLINE Packet8us pcmp_lt(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet8us pcmp_lt(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmplt(a,b)); }
template<> EIGEN_STRONG_INLINE Packet8us pcmp_eq(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet8us pcmp_eq(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmpeq(a,b)); }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet16c pcmp_le(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmple(a,b)); } template<> EIGEN_STRONG_INLINE Packet16c pcmp_le(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmple(a,b)); }
#endif #endif
template<> EIGEN_STRONG_INLINE Packet16c pcmp_lt(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet16c pcmp_lt(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmplt(a,b)); }
template<> EIGEN_STRONG_INLINE Packet16c pcmp_eq(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet16c pcmp_eq(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmpeq(a,b)); }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet16uc pcmp_le(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmple(a,b)); } template<> EIGEN_STRONG_INLINE Packet16uc pcmp_le(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmple(a,b)); }
#endif #endif
template<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmplt(a,b)); }
@ -1164,7 +1164,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
Packet4f t = vec_add(reinterpret_cast<Packet4f>(vec_or(vec_and(reinterpret_cast<Packet4ui>(a), p4ui_SIGN), p4ui_PREV0DOT5)), a); Packet4f t = vec_add(reinterpret_cast<Packet4f>(vec_or(vec_and(reinterpret_cast<Packet4ui>(a), p4ui_SIGN), p4ui_PREV0DOT5)), a);
Packet4f res; Packet4f res;
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
__asm__("xvrspiz %x0, %x1\n\t" __asm__("xvrspiz %x0, %x1\n\t"
: "=&wa" (res) : "=&wa" (res)
: "wa" (t)); : "wa" (t));
@ -1178,7 +1178,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
} }
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); } template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); } template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a) template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
{ {
Packet4f res; Packet4f res;
@ -1194,7 +1194,7 @@ template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from) template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)
{ {
EIGEN_DEBUG_ALIGNED_LOAD EIGEN_DEBUG_ALIGNED_LOAD
#if defined(__VSX__) || !defined(_BIG_ENDIAN) #if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
EIGEN_DEBUG_UNALIGNED_LOAD EIGEN_DEBUG_UNALIGNED_LOAD
return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
#else #else
@ -1377,7 +1377,7 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploaddup<Packet16uc>(const unsigned ch
template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)* to, const Packet& from) template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)* to, const Packet& from)
{ {
EIGEN_DEBUG_UNALIGNED_STORE EIGEN_DEBUG_UNALIGNED_STORE
#if defined(__VSX__) || !defined(_BIG_ENDIAN) #if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
vec_xst(from, 0, to); vec_xst(from, 0, to);
#else #else
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@ -1773,7 +1773,7 @@ template<> EIGEN_STRONG_INLINE Packet8bf pceil<Packet8bf> (const Packet8bf& a){
template<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf> (const Packet8bf& a){ template<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(pround<Packet4f>, a); BF16_TO_F32_UNARY_OP_WRAPPER(pround<Packet4f>, a);
} }
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet8bf print<Packet8bf> (const Packet8bf& a){ template<> EIGEN_STRONG_INLINE Packet8bf print<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(print<Packet4f>, a); BF16_TO_F32_UNARY_OP_WRAPPER(print<Packet4f>, a);
} }
@ -2657,7 +2657,7 @@ template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Pa
//---------- double ---------- //---------- double ----------
#ifdef __VSX__ #ifdef EIGEN_VECTORIZE_VSX
typedef __vector double Packet2d; typedef __vector double Packet2d;
typedef __vector unsigned long long Packet2ul; typedef __vector unsigned long long Packet2ul;
typedef __vector long long Packet2l; typedef __vector long long Packet2l;

View File

@ -352,10 +352,10 @@
#endif #endif
} // end extern "C" } // end extern "C"
#elif defined __VSX__ #elif defined(__VSX__) && !defined(__APPLE__)
#define EIGEN_VECTORIZE #define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_VSX #define EIGEN_VECTORIZE_VSX 1
#include <altivec.h> #include <altivec.h>
// We need to #undef all these ugly tokens defined in <altivec.h> // We need to #undef all these ugly tokens defined in <altivec.h>
// => use __vector instead of vector // => use __vector instead of vector

View File

@ -371,7 +371,7 @@
#endif #endif
/// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC /// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC
#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) #if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) || defined(__POWERPC__)
#define EIGEN_ARCH_PPC 1 #define EIGEN_ARCH_PPC 1
#else #else
#define EIGEN_ARCH_PPC 0 #define EIGEN_ARCH_PPC 0
@ -979,8 +979,13 @@ namespace Eigen {
// This seems to be broken on clang. Packet4f is loaded into a single // This seems to be broken on clang. Packet4f is loaded into a single
// register rather than a vector, zeroing out some entries. Integer // register rather than a vector, zeroing out some entries. Integer
// types also generate a compile error. // types also generate a compile error.
// General, Altivec, VSX. #if EIGEN_OS_MAC
// General, Altivec for Apple (VSX were added in ISA v2.06):
#define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v" (X));
#else
// General, Altivec, VSX otherwise:
#define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X)); #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X));
#endif
#elif EIGEN_ARCH_ARM_OR_ARM64 #elif EIGEN_ARCH_ARM_OR_ARM64
#ifdef __ARM_FP #ifdef __ARM_FP
// General, VFP or NEON. // General, VFP or NEON.