mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Clean up half packet traits and add a few more missing packet ops.
This commit is contained in:
parent
b013176e52
commit
8450a6d519
@ -30,6 +30,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
|
|||||||
size=2,
|
size=2,
|
||||||
HasHalfPacket = 0,
|
HasHalfPacket = 0,
|
||||||
HasAdd = 1,
|
HasAdd = 1,
|
||||||
|
HasSub = 1,
|
||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
@ -572,6 +573,7 @@ struct packet_traits<half> : default_packet_traits {
|
|||||||
HasAdd = 1,
|
HasAdd = 1,
|
||||||
HasSub = 1,
|
HasSub = 1,
|
||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
@ -579,7 +581,6 @@ struct packet_traits<half> : default_packet_traits {
|
|||||||
HasMax = 0,
|
HasMax = 0,
|
||||||
HasConj = 0,
|
HasConj = 0,
|
||||||
HasSetLinear = 0,
|
HasSetLinear = 0,
|
||||||
HasDiv = 0,
|
|
||||||
HasSqrt = 0,
|
HasSqrt = 0,
|
||||||
HasRsqrt = 0,
|
HasRsqrt = 0,
|
||||||
HasExp = 0,
|
HasExp = 0,
|
||||||
@ -770,6 +771,13 @@ template<> EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, con
|
|||||||
return float2half(rf);
|
return float2half(rf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16h pdiv<Packet16h>(const Packet16h& a, const Packet16h& b) {
|
||||||
|
Packet16f af = half2float(a);
|
||||||
|
Packet16f bf = half2float(b);
|
||||||
|
Packet16f rf = pdiv(af, bf);
|
||||||
|
return float2half(rf);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
|
template<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
|
||||||
Packet16f from_float = half2float(from);
|
Packet16f from_float = half2float(from);
|
||||||
return half(predux(from_float));
|
return half(predux(from_float));
|
||||||
@ -1054,6 +1062,7 @@ struct packet_traits<Eigen::half> : default_packet_traits {
|
|||||||
HasAdd = 1,
|
HasAdd = 1,
|
||||||
HasSub = 1,
|
HasSub = 1,
|
||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
@ -1061,7 +1070,6 @@ struct packet_traits<Eigen::half> : default_packet_traits {
|
|||||||
HasMax = 0,
|
HasMax = 0,
|
||||||
HasConj = 0,
|
HasConj = 0,
|
||||||
HasSetLinear = 0,
|
HasSetLinear = 0,
|
||||||
HasDiv = 0,
|
|
||||||
HasSqrt = 0,
|
HasSqrt = 0,
|
||||||
HasRsqrt = 0,
|
HasRsqrt = 0,
|
||||||
HasExp = 0,
|
HasExp = 0,
|
||||||
@ -1221,6 +1229,13 @@ template<> EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const
|
|||||||
return float2half(rf);
|
return float2half(rf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8h pdiv<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||||
|
Packet8f af = half2float(a);
|
||||||
|
Packet8f bf = half2float(b);
|
||||||
|
Packet8f rf = pdiv(af, bf);
|
||||||
|
return float2half(rf);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)
|
template<> EIGEN_STRONG_INLINE Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)
|
||||||
{
|
{
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
@ -1407,9 +1422,10 @@ struct packet_traits<Eigen::half> : default_packet_traits {
|
|||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
size = 4,
|
size = 4,
|
||||||
HasHalfPacket = 0,
|
HasHalfPacket = 0,
|
||||||
HasAdd = 0,
|
HasAdd = 1,
|
||||||
HasSub = 0,
|
HasSub = 1,
|
||||||
HasMul = 0,
|
HasMul = 1,
|
||||||
|
HasDiv = 1,
|
||||||
HasNegate = 0,
|
HasNegate = 0,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
@ -1417,7 +1433,6 @@ struct packet_traits<Eigen::half> : default_packet_traits {
|
|||||||
HasMax = 0,
|
HasMax = 0,
|
||||||
HasConj = 0,
|
HasConj = 0,
|
||||||
HasSetLinear = 0,
|
HasSetLinear = 0,
|
||||||
HasDiv = 0,
|
|
||||||
HasSqrt = 0,
|
HasSqrt = 0,
|
||||||
HasRsqrt = 0,
|
HasRsqrt = 0,
|
||||||
HasExp = 0,
|
HasExp = 0,
|
||||||
@ -1464,6 +1479,29 @@ template<> EIGEN_STRONG_INLINE Packet4h padd<Packet4h>(const Packet4h& a, const
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4h psub<Packet4h>(const Packet4h& a, const Packet4h& b) {
|
||||||
|
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||||
|
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
||||||
|
|
||||||
|
Eigen::half h[4];
|
||||||
|
|
||||||
|
Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));
|
||||||
|
Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));
|
||||||
|
h[0] = ha - hb;
|
||||||
|
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
||||||
|
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
|
||||||
|
h[1] = ha - hb;
|
||||||
|
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
|
||||||
|
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));
|
||||||
|
h[2] = ha - hb;
|
||||||
|
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
|
||||||
|
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));
|
||||||
|
h[3] = ha - hb;
|
||||||
|
Packet4h result;
|
||||||
|
result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const Packet4h& a, const Packet4h& b) {
|
template<> EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const Packet4h& a, const Packet4h& b) {
|
||||||
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||||
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
||||||
@ -1487,6 +1525,29 @@ template<> EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const Packet4h& a, const
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4h pdiv<Packet4h>(const Packet4h& a, const Packet4h& b) {
|
||||||
|
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||||
|
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
||||||
|
|
||||||
|
Eigen::half h[4];
|
||||||
|
|
||||||
|
Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));
|
||||||
|
Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));
|
||||||
|
h[0] = ha / hb;
|
||||||
|
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
||||||
|
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
|
||||||
|
h[1] = ha / hb;
|
||||||
|
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
|
||||||
|
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));
|
||||||
|
h[2] = ha / hb;
|
||||||
|
ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
|
||||||
|
hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));
|
||||||
|
h[3] = ha / hb;
|
||||||
|
Packet4h result;
|
||||||
|
result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4h pload<Packet4h>(const Eigen::half* from) {
|
template<> EIGEN_STRONG_INLINE Packet4h pload<Packet4h>(const Eigen::half* from) {
|
||||||
Packet4h result;
|
Packet4h result;
|
||||||
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
|
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user