diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 8ff226de8..3b94af528 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -402,21 +402,6 @@ struct unpacket_traits { }; }; -// Work around lack of extract/cvt for epi64 when compiling for 32-bit. -#if EIGEN_ARCH_x86_64 -EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { return _mm_cvtsi128_si64(a); } -EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { return _mm_extract_epi64(a, 1); } -#else -// epi64 instructions are not available. The following seems to generate the same instructions -// with -O2 in GCC/Clang. -EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { - return numext::bit_cast(_mm_cvtsd_f64(_mm_castsi128_pd(a))); -} -EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { - return numext::bit_cast(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1))); -} -#endif - // Helper function for bit packing snippet of low precision comparison. // It packs the flags from 16x16 to 8x16. EIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf) { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 97c8ac874..008109adf 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -145,6 +145,27 @@ EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d& a, const Packet2d& b #define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = pset1(X) +// Work around lack of extract/cvt for epi64 when compiling for 32-bit. +#if EIGEN_ARCH_x86_64 +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { return _mm_cvtsi128_si64(a); } +#ifdef EIGEN_VECTORIZE_SSE4_1 +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { return _mm_extract_epi64(a, 1); } +#else +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { + return _mm_cvtsi128_si64(_mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1))); +} +#endif +#else +// epi64 instructions are not available. The following seems to generate the same instructions +// with -O2 in GCC/Clang. +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { + return numext::bit_cast(_mm_cvtsd_f64(_mm_castsi128_pd(a))); +} +EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { + return numext::bit_cast(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1))); +} +#endif + // Use the packet_traits defined in AVX/PacketMath.h instead if we're going // to leverage AVX instructions. #ifndef EIGEN_VECTORIZE_AVX @@ -1610,11 +1631,7 @@ EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { } template <> EIGEN_STRONG_INLINE int64_t pfirst(const Packet2l& a) { -#if EIGEN_ARCH_x86_64 - int64_t x = _mm_cvtsi128_si64(a); -#else - int64_t x = numext::bit_cast(_mm_cvtsd_f64(_mm_castsi128_pd(a))); -#endif + int64_t x = _mm_extract_epi64_0(a); return x; } template <> @@ -1641,11 +1658,7 @@ EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { } template <> EIGEN_STRONG_INLINE int64_t pfirst(const Packet2l& a) { -#if EIGEN_ARCH_x86_64 - int64_t x = _mm_cvtsi128_si64(a); -#else - int64_t x = numext::bit_cast(_mm_cvtsd_f64(_mm_castsi128_pd(a))); -#endif + int64_t x = _mm_extract_epi64_0(a); return x; } template <> @@ -1669,11 +1682,7 @@ EIGEN_STRONG_INLINE double pfirst(const Packet2d& a) { } template <> EIGEN_STRONG_INLINE int64_t pfirst(const Packet2l& a) { -#if EIGEN_ARCH_x86_64 - return _mm_cvtsi128_si64(a); -#else - return numext::bit_cast(_mm_cvtsd_f64(_mm_castsi128_pd(a))); -#endif + return _mm_extract_epi64_0(a); } template <> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h index 42871c91b..02a670cce 100644 --- a/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -88,7 +88,11 @@ EIGEN_STRONG_INLINE Packet4i pcast(const Packet2d& a, const template <> EIGEN_STRONG_INLINE Packet2l pcast(const Packet2d& a) { +#if EIGEN_ARCH_x86_64 return _mm_set_epi64x(_mm_cvtsd_si64(preverse(a)), _mm_cvtsd_si64(a)); +#else + return _mm_set_epi64x(static_cast(pfirst(preverse(a))), static_cast(pfirst(a))); +#endif } template <> diff --git a/ci/build.windows.gitlab-ci.yml b/ci/build.windows.gitlab-ci.yml index 3efe05070..9bcf83aba 100644 --- a/ci/build.windows.gitlab-ci.yml +++ b/ci/build.windows.gitlab-ci.yml @@ -90,3 +90,21 @@ build:windows:x86-64:cuda-11.4:msvc-14.29: variables: EIGEN_CI_MSVC_VER: "14.29" EIGEN_CI_BEFORE_SCRIPT: $$env:CUDA_PATH=$$env:CUDA_PATH_V11_4 + +######## MR Smoke Tests ######################################################## + +# MSVC 14.29 64-bit (VS 2019) +build:windows:x86-64:msvc-14.29:avx512dq:smoketest: + extends: build:windows:x86-64:msvc-14.29:avx512dq + variables: + EIGEN_CI_BUILD_TARGET: buildsmoketests + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + +# MSVC 14.29 32-bit (VS 2019) +build:windows:x86:msvc-14.29:avx512dq:smoketest: + extends: build:windows:x86-64:msvc-14.29:avx512dq:smoketest + variables: + EIGEN_CI_MSVC_ARCH: "x86" + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event"