Add AVX int32_t pdiv

This commit is contained in:
Charles Schlosser 2022-09-16 17:06:29 +00:00 committed by Rasmus Munk Larsen
parent dceb779ecd
commit ea84e7ad63
2 changed files with 10 additions and 3 deletions

View File

@ -212,6 +212,7 @@ template<> struct packet_traits<int> : default_packet_traits
Vectorizable = 1,
AlignedOnScalar = 1,
HasCmp = 1,
HasDiv = 1,
size=8
};
};
@ -545,9 +546,10 @@ template<> EIGEN_STRONG_INLINE Packet8i pmul<Packet8i>(const Packet8i& a, const
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by AVX");
return pset1<Packet8i>(0);
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& a, const Packet8i& b) {
Packet4i lo = pdiv(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
Packet4i hi = pdiv(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
}
#ifdef EIGEN_VECTORIZE_FMA

View File

@ -368,6 +368,11 @@ template<> EIGEN_STRONG_INLINE Packet16b pmul<Packet16b>(const Packet16b& a, con
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
#ifdef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
return _mm256_cvttpd_epi32(_mm256_div_pd(_mm256_cvtepi32_pd(a), _mm256_cvtepi32_pd(b)));
}
#endif
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }