cherry-pick !1682 Add nvc++ support into 3.4

This commit is contained in:
Morris Hafner 2024-11-04 17:55:47 +00:00 committed by Rasmus Munk Larsen
parent 9df21dc8b4
commit 3e7bcf54f7
3 changed files with 14 additions and 9 deletions

View File

@ -373,13 +373,10 @@ template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
//---------- double ---------- //---------- double ----------
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
// See bug 1325, clang fails to call vld1q_u64. inline uint64x2_t p2ul_CONJ_XOR() {
#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML static const uint64_t p2ul_conj_XOR_DATA[] = {0x0, 0x8000000000000000};
static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000}; return vld1q_u64(p2ul_conj_XOR_DATA);
#else }
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
#endif
struct Packet1cd struct Packet1cd
{ {
@ -449,7 +446,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)
{ return Packet1cd(pnegate<Packet2d>(a.v)); } { return Packet1cd(pnegate<Packet2d>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
{ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); } { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR()))); }
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {
@ -464,7 +461,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
// Multiply the imag a with b // Multiply the imag a with b
v2 = vmulq_f64(v2, b.v); v2 = vmulq_f64(v2, b.v);
// Conjugate v2 // Conjugate v2
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR)); v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR()));
// Swap real/imag elements in v2. // Swap real/imag elements in v2.
v2 = preverse<Packet2d>(v2); v2 = preverse<Packet2d>(v2);
// Add and return the result // Add and return the result

View File

@ -179,6 +179,13 @@
#define EIGEN_COMP_PGI 0 #define EIGEN_COMP_PGI 0
#endif #endif
/// \internal EIGEN_COMP_NVHPC set to NVHPC version if the compiler is nvc++
#if defined(__NVCOMPILER)
#define EIGEN_COMP_NVHPC (__NVCOMPILER_MAJOR__ * 100 + __NVCOMPILER_MINOR__)
#else
#define EIGEN_COMP_NVHPC 0
#endif
/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler /// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
#if defined(__CC_ARM) || defined(__ARMCC_VERSION) #if defined(__CC_ARM) || defined(__ARMCC_VERSION)
#define EIGEN_COMP_ARM 1 #define EIGEN_COMP_ARM 1

View File

@ -81,6 +81,7 @@ macro(ei_add_test_internal testname testname_with_suffix)
# let the user pass flags. # let the user pass flags.
if(${ARGC} GREATER 2) if(${ARGC} GREATER 2)
separate_arguments(compile_options NATIVE_COMMAND ${ARGV2})
ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}") ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}")
endif() endif()