Fix build for pblend and psin_double, pcos_double when AVX but not AVX2 is supported.

This commit is contained in:
Rasmus Munk Larsen 2024-04-16 16:12:41 +00:00
parent 888fca0e2b
commit b5feca5d03
2 changed files with 28 additions and 5 deletions

View File

@ -22,7 +22,15 @@ namespace Eigen {
namespace internal { namespace internal {
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f) EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet4d)
EIGEN_DOUBLE_PACKET_FUNCTION(atan, Packet4d);
EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet4d);
EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet4d);
EIGEN_DOUBLE_PACKET_FUNCTION(exp, Packet4d);
#ifdef EIGEN_VECTORIZE_AVX2
EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d);
EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d);
#endif
// Notice that for newer processors, it is counterproductive to use Newton // Notice that for newer processors, it is counterproductive to use Newton
// iteration for square root. In particular, Skylake and Zen2 processors // iteration for square root. In particular, Skylake and Zen2 processors

View File

@ -142,8 +142,10 @@ struct packet_traits<double> : default_packet_traits {
HasCmp = 1, HasCmp = 1,
HasDiv = 1, HasDiv = 1,
#ifdef EIGEN_VECTORIZE_AVX2
HasSin = EIGEN_FAST_MATH, HasSin = EIGEN_FAST_MATH,
HasCos = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH,
#endif
HasLog = 1, HasLog = 1,
HasExp = 1, HasExp = 1,
HasSqrt = 1, HasSqrt = 1,
@ -2135,20 +2137,33 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4d, 4>& kernel) {
template <> template <>
EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket,
const Packet8f& elsePacket) { const Packet8f& elsePacket) {
#ifdef EIGEN_VECTORIZE_AVX2
const __m256i select = const __m256i select =
_mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4],
ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
const __m256i true_mask = _mm256_sub_epi32(_mm256_setzero_si256(), select); const __m256 true_mask = _mm256_castsi256_ps(_mm256_sub_epi32(_mm256_setzero_si256(), select));
return pselect<Packet8f>(_mm256_castsi256_ps(true_mask), thenPacket, elsePacket); #else
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4],
ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
const __m256 true_mask = _mm256_cmp_ps(select, _mm256_setzero_ps(), _CMP_NEQ_UQ);
#endif
return pselect<Packet8f>(true_mask, thenPacket, elsePacket);
} }
template <> template <>
EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket,
const Packet4d& elsePacket) { const Packet4d& elsePacket) {
#ifdef EIGEN_VECTORIZE_AVX2
const __m256i select = const __m256i select =
_mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
const __m256i true_mask = _mm256_sub_epi64(_mm256_setzero_si256(), select); const __m256d true_mask = _mm256_castsi256_pd(_mm256_sub_epi64(_mm256_setzero_si256(), select));
return pselect<Packet4d>(_mm256_castsi256_pd(true_mask), thenPacket, elsePacket); #else
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256d true_mask = _mm256_cmp_pd(select, _mm256_setzero_pd(), _CMP_NEQ_UQ);
#endif
return pselect<Packet4d>(true_mask, thenPacket, elsePacket);
} }
// Packet math for Eigen::half // Packet math for Eigen::half