mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Complete Packet8h implementation and test it in packetmath unit test
This commit is contained in:
parent
a8ab6060df
commit
f4d623ffa7
@ -493,6 +493,13 @@ template<> EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, con
|
|||||||
return float2half(rf);
|
return float2half(rf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16h psub<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||||
|
Packet16f af = half2float(a);
|
||||||
|
Packet16f bf = half2float(b);
|
||||||
|
Packet16f rf = psub(af, bf);
|
||||||
|
return float2half(rf);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
template<> EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||||
Packet16f af = half2float(a);
|
Packet16f af = half2float(a);
|
||||||
Packet16f bf = half2float(b);
|
Packet16f bf = half2float(b);
|
||||||
@ -730,10 +737,10 @@ struct packet_traits<Eigen::half> : default_packet_traits {
|
|||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
size = 8,
|
size = 8,
|
||||||
HasHalfPacket = 0,
|
HasHalfPacket = 0,
|
||||||
HasAdd = 0,
|
HasAdd = 1,
|
||||||
HasSub = 0,
|
HasSub = 1,
|
||||||
HasMul = 0,
|
HasMul = 1,
|
||||||
HasNegate = 0,
|
HasNegate = 1,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
@ -782,6 +789,17 @@ template<> EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const
|
|||||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(to), from.x);
|
_mm_storeu_si128(reinterpret_cast<__m128i*>(to), from.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h
|
||||||
|
ploaddup<Packet8h>(const Eigen::half* from) {
|
||||||
|
Packet8h result;
|
||||||
|
unsigned short a = from[0].x;
|
||||||
|
unsigned short b = from[1].x;
|
||||||
|
unsigned short c = from[2].x;
|
||||||
|
unsigned short d = from[3].x;
|
||||||
|
result.x = _mm_set_epi16(d, d, c, c, b, b, a, a);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h
|
template<> EIGEN_STRONG_INLINE Packet8h
|
||||||
ploadquad<Packet8h>(const Eigen::half* from) {
|
ploadquad<Packet8h>(const Eigen::half* from) {
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
@ -835,6 +853,12 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; }
|
template<> EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h pnegate(const Packet8h& a) {
|
||||||
|
Packet8f af = half2float(a);
|
||||||
|
Packet8f rf = pnegate(af);
|
||||||
|
return float2half(rf);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
template<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||||
Packet8f af = half2float(a);
|
Packet8f af = half2float(a);
|
||||||
Packet8f bf = half2float(b);
|
Packet8f bf = half2float(b);
|
||||||
@ -842,6 +866,13 @@ template<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const
|
|||||||
return float2half(rf);
|
return float2half(rf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h psub<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||||
|
Packet8f af = half2float(a);
|
||||||
|
Packet8f bf = half2float(b);
|
||||||
|
Packet8f rf = psub(af, bf);
|
||||||
|
return float2half(rf);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
template<> EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||||
Packet8f af = half2float(a);
|
Packet8f af = half2float(a);
|
||||||
Packet8f bf = half2float(b);
|
Packet8f bf = half2float(b);
|
||||||
@ -894,6 +925,52 @@ template<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h&
|
|||||||
return Eigen::half(reduced);
|
return Eigen::half(reduced);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h preduxp<Packet8h>(const Packet8h* p) {
|
||||||
|
Packet8f pf[8];
|
||||||
|
pf[0] = half2float(p[0]);
|
||||||
|
pf[1] = half2float(p[1]);
|
||||||
|
pf[2] = half2float(p[2]);
|
||||||
|
pf[3] = half2float(p[3]);
|
||||||
|
pf[4] = half2float(p[4]);
|
||||||
|
pf[5] = half2float(p[5]);
|
||||||
|
pf[6] = half2float(p[6]);
|
||||||
|
pf[7] = half2float(p[7]);
|
||||||
|
Packet8f reduced = preduxp<Packet8f>(pf);
|
||||||
|
return float2half(reduced);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h preverse(const Packet8h& a)
|
||||||
|
{
|
||||||
|
__m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);
|
||||||
|
Packet8h res;
|
||||||
|
res.x = _mm_shuffle_epi8(a.x,m);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h pinsertfirst(const Packet8h& a, Eigen::half b)
|
||||||
|
{
|
||||||
|
Packet8h res;
|
||||||
|
res.x = _mm_insert_epi16(a.x,int(b.x),0);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::half b)
|
||||||
|
{
|
||||||
|
Packet8h res;
|
||||||
|
res.x = _mm_insert_epi16(a.x,int(b.x),15);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int Offset>
|
||||||
|
struct palign_impl<Offset,Packet8h>
|
||||||
|
{
|
||||||
|
static EIGEN_STRONG_INLINE void run(Packet8h& first, const Packet8h& second)
|
||||||
|
{
|
||||||
|
if (Offset!=0)
|
||||||
|
first.x = _mm_alignr_epi8(second.x,first.x, Offset*2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void
|
EIGEN_STRONG_INLINE void
|
||||||
ptranspose(PacketBlock<Packet8h,8>& kernel) {
|
ptranspose(PacketBlock<Packet8h,8>& kernel) {
|
||||||
__m128i a = kernel.packet[0].x;
|
__m128i a = kernel.packet[0].x;
|
||||||
|
@ -123,7 +123,7 @@ template<typename Scalar> void packetmath()
|
|||||||
EIGEN_ALIGN_MAX Scalar data2[size];
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
||||||
EIGEN_ALIGN_MAX Packet packets[PacketSize*2];
|
EIGEN_ALIGN_MAX Packet packets[PacketSize*2];
|
||||||
EIGEN_ALIGN_MAX Scalar ref[size];
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
||||||
RealScalar refvalue = 0;
|
RealScalar refvalue = RealScalar(0);
|
||||||
for (int i=0; i<size; ++i)
|
for (int i=0; i<size; ++i)
|
||||||
{
|
{
|
||||||
data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
|
data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
|
||||||
@ -178,7 +178,8 @@ template<typename Scalar> void packetmath()
|
|||||||
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
|
||||||
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
|
||||||
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasNegate);
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasNegate);
|
||||||
VERIFY((internal::is_same<Scalar,int>::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv);
|
// Disabled as it is not clear why it would be mandatory to support division.
|
||||||
|
//VERIFY((internal::is_same<Scalar,int>::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv);
|
||||||
|
|
||||||
CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd);
|
CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd);
|
||||||
CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub);
|
CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub);
|
||||||
@ -242,29 +243,30 @@ template<typename Scalar> void packetmath()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ref[0] = 0;
|
ref[0] = Scalar(0);
|
||||||
for (int i=0; i<PacketSize; ++i)
|
for (int i=0; i<PacketSize; ++i)
|
||||||
ref[0] += data1[i];
|
ref[0] += data1[i];
|
||||||
VERIFY(isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
|
VERIFY(isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
|
||||||
|
|
||||||
|
if(PacketSize==8 && internal::unpacket_traits<typename internal::unpacket_traits<Packet>::half>::size ==4) // so far, predux_half_dowto4 is only required in such a case
|
||||||
{
|
{
|
||||||
int HalfPacketSize = PacketSize>4 ? PacketSize/2 : PacketSize;
|
int HalfPacketSize = PacketSize>4 ? PacketSize/2 : PacketSize;
|
||||||
for (int i=0; i<HalfPacketSize; ++i)
|
for (int i=0; i<HalfPacketSize; ++i)
|
||||||
ref[i] = 0;
|
ref[i] = Scalar(0);
|
||||||
for (int i=0; i<PacketSize; ++i)
|
for (int i=0; i<PacketSize; ++i)
|
||||||
ref[i%HalfPacketSize] += data1[i];
|
ref[i%HalfPacketSize] += data1[i];
|
||||||
internal::pstore(data2, internal::predux_half_dowto4(internal::pload<Packet>(data1)));
|
internal::pstore(data2, internal::predux_half_dowto4(internal::pload<Packet>(data1)));
|
||||||
VERIFY(areApprox(ref, data2, HalfPacketSize) && "internal::predux_half_dowto4");
|
VERIFY(areApprox(ref, data2, HalfPacketSize) && "internal::predux_half_dowto4");
|
||||||
}
|
}
|
||||||
|
|
||||||
ref[0] = 1;
|
ref[0] = Scalar(1);
|
||||||
for (int i=0; i<PacketSize; ++i)
|
for (int i=0; i<PacketSize; ++i)
|
||||||
ref[0] *= data1[i];
|
ref[0] *= data1[i];
|
||||||
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
|
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
|
||||||
|
|
||||||
for (int j=0; j<PacketSize; ++j)
|
for (int j=0; j<PacketSize; ++j)
|
||||||
{
|
{
|
||||||
ref[j] = 0;
|
ref[j] = Scalar(0);
|
||||||
for (int i=0; i<PacketSize; ++i)
|
for (int i=0; i<PacketSize; ++i)
|
||||||
ref[j] += data1[i+j*PacketSize];
|
ref[j] += data1[i+j*PacketSize];
|
||||||
packets[j] = internal::pload<Packet>(data1+j*PacketSize);
|
packets[j] = internal::pload<Packet>(data1+j*PacketSize);
|
||||||
@ -630,6 +632,7 @@ void test_packetmath()
|
|||||||
CALL_SUBTEST_3( packetmath<int>() );
|
CALL_SUBTEST_3( packetmath<int>() );
|
||||||
CALL_SUBTEST_4( packetmath<std::complex<float> >() );
|
CALL_SUBTEST_4( packetmath<std::complex<float> >() );
|
||||||
CALL_SUBTEST_5( packetmath<std::complex<double> >() );
|
CALL_SUBTEST_5( packetmath<std::complex<double> >() );
|
||||||
|
CALL_SUBTEST_6( packetmath<half>() );
|
||||||
|
|
||||||
CALL_SUBTEST_1( packetmath_notcomplex<float>() );
|
CALL_SUBTEST_1( packetmath_notcomplex<float>() );
|
||||||
CALL_SUBTEST_2( packetmath_notcomplex<double>() );
|
CALL_SUBTEST_2( packetmath_notcomplex<double>() );
|
||||||
|
Loading…
x
Reference in New Issue
Block a user