mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-30 18:25:11 +08:00
Fix build for pblend and psin_double, pcos_double when AVX but not AVX2 is supported.
This commit is contained in:
parent
888fca0e2b
commit
b5feca5d03
@ -22,7 +22,15 @@ namespace Eigen {
|
|||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f)
|
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f)
|
||||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet4d)
|
|
||||||
|
EIGEN_DOUBLE_PACKET_FUNCTION(atan, Packet4d);
|
||||||
|
EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet4d);
|
||||||
|
EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet4d);
|
||||||
|
EIGEN_DOUBLE_PACKET_FUNCTION(exp, Packet4d);
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
|
EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d);
|
||||||
|
EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Notice that for newer processors, it is counterproductive to use Newton
|
// Notice that for newer processors, it is counterproductive to use Newton
|
||||||
// iteration for square root. In particular, Skylake and Zen2 processors
|
// iteration for square root. In particular, Skylake and Zen2 processors
|
||||||
|
@ -142,8 +142,10 @@ struct packet_traits<double> : default_packet_traits {
|
|||||||
|
|
||||||
HasCmp = 1,
|
HasCmp = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
|
#endif
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
@ -2135,20 +2137,33 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4d, 4>& kernel) {
|
|||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket,
|
EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket,
|
||||||
const Packet8f& elsePacket) {
|
const Packet8f& elsePacket) {
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
const __m256i select =
|
const __m256i select =
|
||||||
_mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4],
|
_mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4],
|
||||||
ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||||
const __m256i true_mask = _mm256_sub_epi32(_mm256_setzero_si256(), select);
|
const __m256 true_mask = _mm256_castsi256_ps(_mm256_sub_epi32(_mm256_setzero_si256(), select));
|
||||||
return pselect<Packet8f>(_mm256_castsi256_ps(true_mask), thenPacket, elsePacket);
|
#else
|
||||||
|
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4],
|
||||||
|
ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||||
|
const __m256 true_mask = _mm256_cmp_ps(select, _mm256_setzero_ps(), _CMP_NEQ_UQ);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return pselect<Packet8f>(true_mask, thenPacket, elsePacket);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket,
|
EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket,
|
||||||
const Packet4d& elsePacket) {
|
const Packet4d& elsePacket) {
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
const __m256i select =
|
const __m256i select =
|
||||||
_mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
_mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||||
const __m256i true_mask = _mm256_sub_epi64(_mm256_setzero_si256(), select);
|
const __m256d true_mask = _mm256_castsi256_pd(_mm256_sub_epi64(_mm256_setzero_si256(), select));
|
||||||
return pselect<Packet4d>(_mm256_castsi256_pd(true_mask), thenPacket, elsePacket);
|
#else
|
||||||
|
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||||
|
__m256d true_mask = _mm256_cmp_pd(select, _mm256_setzero_pd(), _CMP_NEQ_UQ);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return pselect<Packet4d>(true_mask, thenPacket, elsePacket);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Packet math for Eigen::half
|
// Packet math for Eigen::half
|
||||||
|
Loading…
x
Reference in New Issue
Block a user