mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-01 00:04:14 +08:00
more pblend optimizations
This commit is contained in:
parent
f0795d35e3
commit
5635d37f46
@ -2134,35 +2134,28 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4d, 4>& kernel) {
|
||||
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<4>& ifPacket) {
|
||||
return _mm256_set_epi64x(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1],
|
||||
0 - ifPacket.select[0]);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<8>& ifPacket) {
|
||||
return _mm256_set_epi32(0 - ifPacket.select[7], 0 - ifPacket.select[6], 0 - ifPacket.select[5],
|
||||
0 - ifPacket.select[4], 0 - ifPacket.select[3], 0 - ifPacket.select[2],
|
||||
0 - ifPacket.select[1], 0 - ifPacket.select[0]);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket,
|
||||
const Packet8f& elsePacket) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
const __m256i select =
|
||||
_mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4],
|
||||
ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
const __m256 true_mask = _mm256_castsi256_ps(_mm256_sub_epi32(_mm256_setzero_si256(), select));
|
||||
#else
|
||||
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4],
|
||||
ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
const __m256 true_mask = _mm256_cmp_ps(select, _mm256_setzero_ps(), _CMP_NEQ_UQ);
|
||||
#endif
|
||||
|
||||
const __m256 true_mask = _mm256_castsi256_ps(avx_blend_mask(ifPacket));
|
||||
return pselect<Packet8f>(true_mask, thenPacket, elsePacket);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket,
|
||||
const Packet4d& elsePacket) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
const __m256i select =
|
||||
_mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
const __m256d true_mask = _mm256_castsi256_pd(_mm256_sub_epi64(_mm256_setzero_si256(), select));
|
||||
#else
|
||||
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256d true_mask = _mm256_cmp_pd(select, _mm256_setzero_pd(), _CMP_NEQ_UQ);
|
||||
#endif
|
||||
|
||||
const __m256d true_mask = _mm256_castsi256_pd(avx_blend_mask(ifPacket));
|
||||
return pselect<Packet4d>(true_mask, thenPacket, elsePacket);
|
||||
}
|
||||
|
||||
|
@ -2232,18 +2232,24 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet16b, 16>& kernel) {
|
||||
kernel.packet[15] = _mm_unpackhi_epi64(u7, uf);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<2>& ifPacket) {
|
||||
return _mm_set_epi64x(0 - ifPacket.select[1], 0 - ifPacket.select[0]);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<4>& ifPacket) {
|
||||
return _mm_set_epi32(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1], 0 - ifPacket.select[0]);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pblend(const Selector<2>& ifPacket, const Packet2l& thenPacket,
|
||||
const Packet2l& elsePacket) {
|
||||
const __m128i select = _mm_set_epi64x(ifPacket.select[1], ifPacket.select[0]);
|
||||
const __m128i true_mask = _mm_sub_epi64(_mm_setzero_si128(), select);
|
||||
const __m128i true_mask = sse_blend_mask(ifPacket);
|
||||
return pselect<Packet2l>(true_mask, thenPacket, elsePacket);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
|
||||
const Packet4i& elsePacket) {
|
||||
const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
const __m128i true_mask = _mm_sub_epi32(_mm_setzero_si128(), select);
|
||||
const __m128i true_mask = sse_blend_mask(ifPacket);
|
||||
return pselect<Packet4i>(true_mask, thenPacket, elsePacket);
|
||||
}
|
||||
template <>
|
||||
@ -2254,15 +2260,13 @@ EIGEN_STRONG_INLINE Packet4ui pblend(const Selector<4>& ifPacket, const Packet4u
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
|
||||
const Packet4f& elsePacket) {
|
||||
const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
const __m128i true_mask = _mm_sub_epi32(_mm_setzero_si128(), select);
|
||||
const __m128i true_mask = sse_blend_mask(ifPacket);
|
||||
return pselect<Packet4f>(_mm_castsi128_ps(true_mask), thenPacket, elsePacket);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
|
||||
const Packet2d& elsePacket) {
|
||||
const __m128i select = _mm_set_epi64x(ifPacket.select[1], ifPacket.select[0]);
|
||||
const __m128i true_mask = _mm_sub_epi64(_mm_setzero_si128(), select);
|
||||
const __m128i true_mask = sse_blend_mask(ifPacket);
|
||||
return pselect<Packet2d>(_mm_castsi128_pd(true_mask), thenPacket, elsePacket);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user