mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 02:29:33 +08:00
Add psin/pcos on AVX512 -> almost for free, at last!
This commit is contained in:
parent
c68bd2fa7a
commit
fa87f9d876
@ -373,6 +373,19 @@ EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
psin<Packet16f>(const Packet16f& _x) {
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
pcos<Packet16f>(const Packet16f& _x) {
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -55,6 +55,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
size = 16,
|
||||
HasHalfPacket = 1,
|
||||
HasBlend = 0,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
HasLog = 1,
|
||||
@ -99,6 +101,7 @@ template <>
|
||||
struct unpacket_traits<Packet16f> {
|
||||
typedef float type;
|
||||
typedef Packet8f half;
|
||||
typedef Packet16i integer_packet;
|
||||
enum { size = 16, alignment=Aligned64 };
|
||||
};
|
||||
template <>
|
||||
@ -127,6 +130,11 @@ EIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) {
|
||||
return _mm512_set1_epi32(from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f pset1frombits<Packet16f>(unsigned int from) {
|
||||
return _mm512_castsi512_ps(_mm512_set1_epi32(from));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
|
||||
return _mm512_broadcastss_ps(_mm_load_ps1(from));
|
||||
@ -254,6 +262,12 @@ EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
|
||||
return _mm512_max_pd(b, a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) {
|
||||
__m256i lo = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b, 0));
|
||||
__m256i hi = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1));
|
||||
return _mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a,
|
||||
const Packet16f& b) {
|
||||
@ -434,6 +448,10 @@ EIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int N> EIGEN_STRONG_INLINE Packet16i pshiftleft(Packet16i a) {
|
||||
return _mm512_slli_epi32(a, N);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);
|
||||
@ -1322,6 +1340,22 @@ template<> EIGEN_STRONG_INLINE Packet8d pinsertlast(const Packet8d& a, double b)
|
||||
return _mm512_mask_broadcastsd_pd(a, (1<<7), _mm_load_sd(&b));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
|
||||
return _mm512_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
|
||||
return _mm512_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i,Packet16f>(const Packet16f& a) {
|
||||
return _mm512_castps_si512(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f,Packet16i>(const Packet16i& a) {
|
||||
return _mm512_castsi512_ps(a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
Loading…
x
Reference in New Issue
Block a user