mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
More fixes for 32-bit.
This commit is contained in:
parent
de304ab960
commit
c8d368bdaf
@ -402,21 +402,6 @@ struct unpacket_traits<Packet8bf> {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Work around lack of extract/cvt for epi64 when compiling for 32-bit.
|
|
||||||
#if EIGEN_ARCH_x86_64
|
|
||||||
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { return _mm_cvtsi128_si64(a); }
|
|
||||||
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { return _mm_extract_epi64(a, 1); }
|
|
||||||
#else
|
|
||||||
// epi64 instructions are not available. The following seems to generate the same instructions
|
|
||||||
// with -O2 in GCC/Clang.
|
|
||||||
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) {
|
|
||||||
return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(a)));
|
|
||||||
}
|
|
||||||
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) {
|
|
||||||
return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1)));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Helper function for bit packing snippet of low precision comparison.
|
// Helper function for bit packing snippet of low precision comparison.
|
||||||
// It packs the flags from 16x16 to 8x16.
|
// It packs the flags from 16x16 to 8x16.
|
||||||
EIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf) {
|
EIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf) {
|
||||||
|
@ -145,6 +145,27 @@ EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d& a, const Packet2d& b
|
|||||||
|
|
||||||
#define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = pset1<Packet4ui>(X)
|
#define EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = pset1<Packet4ui>(X)
|
||||||
|
|
||||||
|
// Work around lack of extract/cvt for epi64 when compiling for 32-bit.
|
||||||
|
#if EIGEN_ARCH_x86_64
|
||||||
|
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) { return _mm_cvtsi128_si64(a); }
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||||
|
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) { return _mm_extract_epi64(a, 1); }
|
||||||
|
#else
|
||||||
|
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) {
|
||||||
|
return _mm_cvtsi128_si64(_mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
// epi64 instructions are not available. The following seems to generate the same instructions
|
||||||
|
// with -O2 in GCC/Clang.
|
||||||
|
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_0(const __m128i& a) {
|
||||||
|
return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(a)));
|
||||||
|
}
|
||||||
|
EIGEN_ALWAYS_INLINE int64_t _mm_extract_epi64_1(const __m128i& a) {
|
||||||
|
return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_shuffle_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(a), 0x1)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||||
// to leverage AVX instructions.
|
// to leverage AVX instructions.
|
||||||
#ifndef EIGEN_VECTORIZE_AVX
|
#ifndef EIGEN_VECTORIZE_AVX
|
||||||
@ -1610,11 +1631,7 @@ EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
|
|||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
|
EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
|
||||||
#if EIGEN_ARCH_x86_64
|
int64_t x = _mm_extract_epi64_0(a);
|
||||||
int64_t x = _mm_cvtsi128_si64(a);
|
|
||||||
#else
|
|
||||||
int64_t x = numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(a)));
|
|
||||||
#endif
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
@ -1641,11 +1658,7 @@ EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
|
|||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
|
EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
|
||||||
#if EIGEN_ARCH_x86_64
|
int64_t x = _mm_extract_epi64_0(a);
|
||||||
int64_t x = _mm_cvtsi128_si64(a);
|
|
||||||
#else
|
|
||||||
int64_t x = numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(a)));
|
|
||||||
#endif
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
@ -1669,11 +1682,7 @@ EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
|
|||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
|
EIGEN_STRONG_INLINE int64_t pfirst<Packet2l>(const Packet2l& a) {
|
||||||
#if EIGEN_ARCH_x86_64
|
return _mm_extract_epi64_0(a);
|
||||||
return _mm_cvtsi128_si64(a);
|
|
||||||
#else
|
|
||||||
return numext::bit_cast<int64_t>(_mm_cvtsd_f64(_mm_castsi128_pd(a)));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
|
EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
|
||||||
|
@ -88,7 +88,11 @@ EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {
|
EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {
|
||||||
|
#if EIGEN_ARCH_x86_64
|
||||||
return _mm_set_epi64x(_mm_cvtsd_si64(preverse(a)), _mm_cvtsd_si64(a));
|
return _mm_set_epi64x(_mm_cvtsd_si64(preverse(a)), _mm_cvtsd_si64(a));
|
||||||
|
#else
|
||||||
|
return _mm_set_epi64x(static_cast<int64_t>(pfirst(preverse(a))), static_cast<int64_t>(pfirst(a)));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -90,3 +90,21 @@ build:windows:x86-64:cuda-11.4:msvc-14.29:
|
|||||||
variables:
|
variables:
|
||||||
EIGEN_CI_MSVC_VER: "14.29"
|
EIGEN_CI_MSVC_VER: "14.29"
|
||||||
EIGEN_CI_BEFORE_SCRIPT: $$env:CUDA_PATH=$$env:CUDA_PATH_V11_4
|
EIGEN_CI_BEFORE_SCRIPT: $$env:CUDA_PATH=$$env:CUDA_PATH_V11_4
|
||||||
|
|
||||||
|
######## MR Smoke Tests ########################################################
|
||||||
|
|
||||||
|
# MSVC 14.29 64-bit (VS 2019)
|
||||||
|
build:windows:x86-64:msvc-14.29:avx512dq:smoketest:
|
||||||
|
extends: build:windows:x86-64:msvc-14.29:avx512dq
|
||||||
|
variables:
|
||||||
|
EIGEN_CI_BUILD_TARGET: buildsmoketests
|
||||||
|
rules:
|
||||||
|
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||||
|
|
||||||
|
# MSVC 14.29 32-bit (VS 2019)
|
||||||
|
build:windows:x86:msvc-14.29:avx512dq:smoketest:
|
||||||
|
extends: build:windows:x86-64:msvc-14.29:avx512dq:smoketest
|
||||||
|
variables:
|
||||||
|
EIGEN_CI_MSVC_ARCH: "x86"
|
||||||
|
rules:
|
||||||
|
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user