mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Add AVX int32_t pdiv
This commit is contained in:
parent
dceb779ecd
commit
ea84e7ad63
@ -212,6 +212,7 @@ template<> struct packet_traits<int> : default_packet_traits
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
size=8
|
||||
};
|
||||
};
|
||||
@ -545,9 +546,10 @@ template<> EIGEN_STRONG_INLINE Packet8i pmul<Packet8i>(const Packet8i& a, const
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
|
||||
{ eigen_assert(false && "packet integer division are not supported by AVX");
|
||||
return pset1<Packet8i>(0);
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& a, const Packet8i& b) {
|
||||
Packet4i lo = pdiv(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
|
||||
Packet4i hi = pdiv(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
|
@ -368,6 +368,11 @@ template<> EIGEN_STRONG_INLINE Packet16b pmul<Packet16b>(const Packet16b& a, con
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
|
||||
#ifdef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
|
||||
return _mm256_cvttpd_epi32(_mm256_div_pd(_mm256_cvtepi32_pd(a), _mm256_cvtepi32_pd(b)));
|
||||
}
|
||||
#endif
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user