mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-09 18:29:03 +08:00
Add support for casting between double and int64_t for SSE and AVX2.
This commit is contained in:
parent
d883932586
commit
b86641a4c2
@ -270,9 +270,7 @@ struct packet_traits<uint32_t> : default_packet_traits {
|
||||
template <>
|
||||
struct packet_traits<int64_t> : default_packet_traits {
|
||||
typedef Packet4l type;
|
||||
// There is no half-size packet for current Packet4l.
|
||||
// TODO: support as SSE path.
|
||||
typedef Packet4l half;
|
||||
typedef Packet2l half;
|
||||
enum { Vectorizable = 1, AlignedOnScalar = 1, HasCmp = 1, size = 4 };
|
||||
};
|
||||
template <>
|
||||
@ -332,6 +330,7 @@ template <>
|
||||
struct unpacket_traits<Packet4d> {
|
||||
typedef double type;
|
||||
typedef Packet2d half;
|
||||
typedef Packet4l integer_packet;
|
||||
enum {
|
||||
size = 4,
|
||||
alignment = Aligned32,
|
||||
@ -368,7 +367,7 @@ struct unpacket_traits<Packet8ui> {
|
||||
template <>
|
||||
struct unpacket_traits<Packet4l> {
|
||||
typedef int64_t type;
|
||||
typedef Packet4l half;
|
||||
typedef Packet2l half;
|
||||
enum {
|
||||
size = 4,
|
||||
alignment = Aligned32,
|
||||
|
@ -47,6 +47,13 @@ template <>
|
||||
struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfloat16, float> {};
|
||||
template <>
|
||||
struct type_casting_traits<float, bfloat16> : vectorized_type_casting_traits<float, bfloat16> {};
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template <>
|
||||
struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
|
||||
template <>
|
||||
struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <>
|
||||
@ -188,6 +195,35 @@ EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet8ui>(const Packet8ui
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l pcast<Packet4d, Packet4l>(const Packet4d& a) {
|
||||
#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
|
||||
return _mm256_cvttpd_epi64(a);
|
||||
#else
|
||||
EIGEN_ALIGN16 double aux[4];
|
||||
pstore(aux, a);
|
||||
return _mm256_set_epi64x(static_cast<int64_t>(aux[3]), static_cast<int64_t>(aux[2]), static_cast<int64_t>(aux[1]),
|
||||
static_cast<int64_t>(aux[0]));
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pcast<Packet4l, Packet4d>(const Packet4l& a) {
|
||||
#if defined(EIGEN_VECTORIZE_AVX512DQ) && defined(EIGEN_VECTORIZE_AVS512VL)
|
||||
return _mm256_cvtepi64_pd(a);
|
||||
#else
|
||||
EIGEN_ALIGN16 int64_t aux[4];
|
||||
pstore(aux, a);
|
||||
return _mm256_set_pd(static_cast<double>(aux[3]), static_cast<double>(aux[2]), static_cast<double>(aux[1]),
|
||||
static_cast<double>(aux[0]));
|
||||
#endif
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d pcast<Packet2l, Packet4d>(const Packet2l& a, const Packet2l& b) {
|
||||
return _mm256_set_m128d(pcast<Packet2l, Packet2d>(b), pcast<Packet2l, Packet2d>(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ul preinterpret<Packet4ul, Packet4l>(const Packet4l& a) {
|
||||
return Packet4ul(a);
|
||||
@ -198,6 +234,21 @@ EIGEN_STRONG_INLINE Packet4l preinterpret<Packet4l, Packet4ul>(const Packet4ul&
|
||||
return Packet4l(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4l preinterpret<Packet4l, Packet4d>(const Packet4d& a) {
|
||||
return _mm256_castpd_si256(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4d preinterpret<Packet4d, Packet4l>(const Packet4l& a) {
|
||||
return _mm256_castsi256_pd(a);
|
||||
}
|
||||
|
||||
// truncation operations
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet4l>(const Packet4l& a) {
|
||||
return _mm256_castsi256_si128(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
|
@ -34,6 +34,7 @@ namespace internal {
|
||||
typedef __m512 Packet16f;
|
||||
typedef __m512i Packet16i;
|
||||
typedef __m512d Packet8d;
|
||||
// TODO(rmlarsen): Add support for Packet8l.
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
|
||||
#endif
|
||||
|
@ -37,6 +37,13 @@ template <>
|
||||
struct type_casting_traits<double, int> : vectorized_type_casting_traits<double, int> {};
|
||||
template <>
|
||||
struct type_casting_traits<int, double> : vectorized_type_casting_traits<int, double> {};
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX2
|
||||
template <>
|
||||
struct type_casting_traits<double, int64_t> : vectorized_type_casting_traits<double, int64_t> {};
|
||||
template <>
|
||||
struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int64_t, double> {};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <>
|
||||
@ -79,6 +86,18 @@ EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const
|
||||
(1 << 2) | (1 << 6)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {
|
||||
return _mm_set_epi64x(_mm_cvtsd_si64(preverse(a)), _mm_cvtsd_si64(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) {
|
||||
EIGEN_ALIGN16 int64_t aux[2];
|
||||
pstore(aux, a);
|
||||
return _mm_set_pd(static_cast<double>(aux[1]), static_cast<double>(aux[0]));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return _mm_cvtepi32_ps(a);
|
||||
@ -126,6 +145,15 @@ EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a)
|
||||
return _mm_castsi128_pd(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
|
||||
return _mm_castsi128_pd(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
|
||||
return _mm_castpd_si128(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
|
||||
return _mm_castpd_si128(a);
|
||||
@ -140,6 +168,7 @@ template <>
|
||||
EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
|
||||
return Packet4i(a);
|
||||
}
|
||||
|
||||
// Disable the following code since it's broken on too many platforms / compilers.
|
||||
// #elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
|
||||
#if 0
|
||||
|
@ -266,6 +266,9 @@
|
||||
#ifdef __AVX512BF16__
|
||||
#define EIGEN_VECTORIZE_AVX512BF16
|
||||
#endif
|
||||
#ifdef __AVX512VL__
|
||||
#define EIGEN_VECTORIZE_AVX512VL
|
||||
#endif
|
||||
#ifdef __AVX512FP16__
|
||||
#ifdef __AVX512VL__
|
||||
#define EIGEN_VECTORIZE_AVX512FP16
|
||||
|
Loading…
x
Reference in New Issue
Block a user