mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-22 22:33:15 +08:00
Implement ptranspose on altivec and fix pgather/pscatter
This commit is contained in:
parent
4eb92e5647
commit
8d85ce88e1
@ -21,6 +21,8 @@ static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);
|
|||||||
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||||
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||||
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||||
|
static Packet16uc p16uc_COMPLEX_TRANSPOSE_0 = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
|
||||||
|
static Packet16uc p16uc_COMPLEX_TRANSPOSE_1 = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
|
||||||
|
|
||||||
//---------- float ----------
|
//---------- float ----------
|
||||||
struct Packet2cf
|
struct Packet2cf
|
||||||
@ -52,7 +54,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
|
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; typedef Packet2cf half; };
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||||
{
|
{
|
||||||
@ -71,12 +73,12 @@ template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packe
|
|||||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||||
af[0] = from[0*stride];
|
af[0] = from[0*stride];
|
||||||
af[1] = from[1*stride];
|
af[1] = from[1*stride];
|
||||||
return Packet2cf(vec_ld(0, af));
|
return Packet2cf(vec_ld(0, (const float*)af));
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
|
||||||
{
|
{
|
||||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||||
vec_st(from.v, 0, af);
|
vec_st(from.v, 0, (float*)af);
|
||||||
to[0*stride] = af[0];
|
to[0*stride] = af[0];
|
||||||
to[1*stride] = af[1];
|
to[1*stride] = af[1];
|
||||||
}
|
}
|
||||||
@ -227,6 +229,13 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x
|
|||||||
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
|
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE void ptranspose(Kernel<Packet2cf>& kernel)
|
||||||
|
{
|
||||||
|
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0);
|
||||||
|
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1);
|
||||||
|
kernel.packet[0].v = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -146,6 +146,7 @@ inline std::ostream & operator <<(std::ostream & s, const Packetbi & v)
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
|
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
|
||||||
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
|
||||||
float EIGEN_ALIGN16 af[4];
|
float EIGEN_ALIGN16 af[4];
|
||||||
@ -533,6 +534,32 @@ struct palign_impl<Offset,Packet4i>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline void
|
||||||
|
ptranspose(Kernel<Packet4f>& kernel) {
|
||||||
|
Packet4f t0, t1, t2, t3;
|
||||||
|
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||||
|
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
|
||||||
|
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
|
||||||
|
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
|
||||||
|
kernel.packet[0] = vec_mergeh(t0, t2);
|
||||||
|
kernel.packet[1] = vec_mergel(t0, t2);
|
||||||
|
kernel.packet[2] = vec_mergeh(t1, t3);
|
||||||
|
kernel.packet[3] = vec_mergel(t1, t3);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline void
|
||||||
|
ptranspose(Kernel<Packet4i>& kernel) {
|
||||||
|
Packet4i t0, t1, t2, t3;
|
||||||
|
t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||||
|
t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
|
||||||
|
t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
|
||||||
|
t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
|
||||||
|
kernel.packet[0] = vec_mergeh(t0, t2);
|
||||||
|
kernel.packet[1] = vec_mergel(t0, t2);
|
||||||
|
kernel.packet[2] = vec_mergeh(t1, t3);
|
||||||
|
kernel.packet[3] = vec_mergel(t1, t3);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
Loading…
x
Reference in New Issue
Block a user