mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-15 21:26:00 +08:00
Implemented palign_impl for AVX512
This commit is contained in:
parent
ef3ac9d05a
commit
fa5a8f055a
@ -54,7 +54,6 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
size = 16,
|
size = 16,
|
||||||
HasHalfPacket = 1,
|
HasHalfPacket = 1,
|
||||||
HasDiv = 1,
|
|
||||||
#if EIGEN_GNUC_AT_LEAST(5, 3)
|
#if EIGEN_GNUC_AT_LEAST(5, 3)
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
@ -63,6 +62,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
#endif
|
#endif
|
||||||
|
HasDiv = 1
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
template<> struct packet_traits<double> : default_packet_traits
|
template<> struct packet_traits<double> : default_packet_traits
|
||||||
@ -997,9 +997,26 @@ EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
|
|||||||
|
|
||||||
template <int Offset>
|
template <int Offset>
|
||||||
struct palign_impl<Offset, Packet16f> {
|
struct palign_impl<Offset, Packet16f> {
|
||||||
static EIGEN_STRONG_INLINE void run(Packet16f& first, const Packet16f& second) {
|
static EIGEN_STRONG_INLINE void run(Packet16f& first,
|
||||||
|
const Packet16f& second) {
|
||||||
if (Offset != 0) {
|
if (Offset != 0) {
|
||||||
assert(false && "To be implemented");
|
__m512i first_idx = _mm512_set_epi32(
|
||||||
|
Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
|
||||||
|
Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
|
||||||
|
Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
|
||||||
|
|
||||||
|
__m512i second_idx =
|
||||||
|
_mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
|
||||||
|
Offset - 5, Offset - 6, Offset - 7, Offset - 8,
|
||||||
|
Offset - 9, Offset - 10, Offset - 11, Offset - 12,
|
||||||
|
Offset - 13, Offset - 14, Offset - 15, Offset - 16);
|
||||||
|
|
||||||
|
unsigned short mask = 0xFFFF;
|
||||||
|
mask <<= (16 - Offset);
|
||||||
|
|
||||||
|
first = _mm512_permutexvar_ps(first_idx, first);
|
||||||
|
Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
|
||||||
|
first = _mm512_mask_blend_ps(mask, first, tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -1007,7 +1024,20 @@ template <int Offset>
|
|||||||
struct palign_impl<Offset, Packet8d> {
|
struct palign_impl<Offset, Packet8d> {
|
||||||
static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
|
static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
|
||||||
if (Offset != 0) {
|
if (Offset != 0) {
|
||||||
assert(false && "To be implemented");
|
__m512i first_idx = _mm512_set_epi32(
|
||||||
|
0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
|
||||||
|
Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
|
||||||
|
|
||||||
|
__m512i second_idx = _mm512_set_epi32(
|
||||||
|
0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
|
||||||
|
Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
|
||||||
|
|
||||||
|
unsigned char mask = 0xFF;
|
||||||
|
mask <<= (8 - Offset);
|
||||||
|
|
||||||
|
first = _mm512_permutexvar_pd(first_idx, first);
|
||||||
|
Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
|
||||||
|
first = _mm512_mask_blend_pd(mask, first, tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user