diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 6df332470..dac43fcd8 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -2134,35 +2134,28 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49); } +EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<4>& ifPacket) { + return _mm256_set_epi64x(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1], + 0 - ifPacket.select[0]); +} + +EIGEN_STRONG_INLINE __m256i avx_blend_mask(const Selector<8>& ifPacket) { + return _mm256_set_epi32(0 - ifPacket.select[7], 0 - ifPacket.select[6], 0 - ifPacket.select[5], + 0 - ifPacket.select[4], 0 - ifPacket.select[3], 0 - ifPacket.select[2], + 0 - ifPacket.select[1], 0 - ifPacket.select[0]); +} + template <> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) { -#ifdef EIGEN_VECTORIZE_AVX2 - const __m256i select = - _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], - ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - const __m256 true_mask = _mm256_castsi256_ps(_mm256_sub_epi32(_mm256_setzero_si256(), select)); -#else - const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], - ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - const __m256 true_mask = _mm256_cmp_ps(select, _mm256_setzero_ps(), _CMP_NEQ_UQ); -#endif - + const __m256 true_mask = _mm256_castsi256_ps(avx_blend_mask(ifPacket)); return pselect(true_mask, thenPacket, elsePacket); } template <> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) { -#ifdef EIGEN_VECTORIZE_AVX2 - const __m256i select = - _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - const __m256d true_mask = _mm256_castsi256_pd(_mm256_sub_epi64(_mm256_setzero_si256(), select)); -#else - const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m256d true_mask = _mm256_cmp_pd(select, _mm256_setzero_pd(), _CMP_NEQ_UQ); -#endif - + const __m256d true_mask = _mm256_castsi256_pd(avx_blend_mask(ifPacket)); return pselect(true_mask, thenPacket, elsePacket); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 7e6a1b9a2..7bac3f9c7 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -2232,18 +2232,24 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { kernel.packet[15] = _mm_unpackhi_epi64(u7, uf); } +EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<2>& ifPacket) { + return _mm_set_epi64x(0 - ifPacket.select[1], 0 - ifPacket.select[0]); +} + +EIGEN_STRONG_INLINE __m128i sse_blend_mask(const Selector<4>& ifPacket) { + return _mm_set_epi32(0 - ifPacket.select[3], 0 - ifPacket.select[2], 0 - ifPacket.select[1], 0 - ifPacket.select[0]); +} + template <> EIGEN_STRONG_INLINE Packet2l pblend(const Selector<2>& ifPacket, const Packet2l& thenPacket, const Packet2l& elsePacket) { - const __m128i select = _mm_set_epi64x(ifPacket.select[1], ifPacket.select[0]); - const __m128i true_mask = _mm_sub_epi64(_mm_setzero_si128(), select); + const __m128i true_mask = sse_blend_mask(ifPacket); return pselect(true_mask, thenPacket, elsePacket); } template <> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) { - const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - const __m128i true_mask = _mm_sub_epi32(_mm_setzero_si128(), select); + const __m128i true_mask = sse_blend_mask(ifPacket); return pselect(true_mask, thenPacket, elsePacket); } template <> @@ -2254,15 +2260,13 @@ EIGEN_STRONG_INLINE Packet4ui pblend(const Selector<4>& ifPacket, const Packet4u template <> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) { - const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - const __m128i true_mask = _mm_sub_epi32(_mm_setzero_si128(), select); + const __m128i true_mask = sse_blend_mask(ifPacket); return pselect(_mm_castsi128_ps(true_mask), thenPacket, elsePacket); } template <> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { - const __m128i select = _mm_set_epi64x(ifPacket.select[1], ifPacket.select[0]); - const __m128i true_mask = _mm_sub_epi64(_mm_setzero_si128(), select); + const __m128i true_mask = sse_blend_mask(ifPacket); return pselect(_mm_castsi128_pd(true_mask), thenPacket, elsePacket); }