diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index d885d9fbd..94b18584e 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -368,6 +368,11 @@ template EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { return a / b; } +// Avoid compiler warning for boolean algebra. +template <> +EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) { + return a && b; +} // In the generic case, memset to all one bits. template @@ -1294,29 +1299,61 @@ EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) { * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ +template +struct pmadd_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { + return padd(pmul(a, b), c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) { + return psub(pmul(a, b), c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { + return psub(c, pmul(a, b)); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { + return pnegate(pmadd(a, b, c)); + } +}; + +template +struct pmadd_impl::value && NumTraits::IsSigned>> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::fma(a, b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::fma(a, b, Scalar(-c)); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::fma(Scalar(-a), b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return -Scalar(numext::fma(a, b, c)); + } +}; + // FMA instructions. /** \internal \returns a * b + c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { - return padd(pmul(a, b), c); + return pmadd_impl::pmadd(a, b, c); } /** \internal \returns a * b - c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) { - return psub(pmul(a, b), c); + return pmadd_impl::pmsub(a, b, c); } /** \internal \returns -(a * b) + c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { - return psub(c, pmul(a, b)); + return pmadd_impl::pnmadd(a, b, c); } /** \internal \returns -((a * b + c) (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { - return pnegate(pmadd(a, b, c)); + return pmadd_impl::pnmsub(a, b, c); } /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 9659c2bfc..d4aa3f889 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -945,6 +945,38 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { return x; } }; +// Default implementation. +template +struct fma_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& a, const Scalar& b, const Scalar& c) { + return a * b + c; + } +}; + +// ADL version if it exists. +template +struct fma_impl< + T, + std::enable_if_t(), std::declval(), std::declval()))>::value>> { + static T run(const T& a, const T& b, const T& c) { return fma(a, b, c); } +}; + +#if defined(EIGEN_GPUCC) +template <> +struct fma_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float run(const float& a, const float& b, const float& c) { + return ::fmaf(a, b, c); + } +}; + +template <> +struct fma_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double run(const double& a, const double& b, const double& c) { + return ::fma(a, b, c); + } +}; +#endif + } // end namespace internal /**************************************************************************** @@ -1852,6 +1884,15 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar return bit_cast(bit_cast(a) >> n); } +// Use std::fma if available. +using std::fma; + +// Otherwise, rely on template implementation. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar fma(const Scalar& x, const Scalar& y, const Scalar& z) { + return internal::fma_impl::run(x, y, z); +} + } // end namespace numext namespace internal { diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index aa93e4516..31a1d8ac8 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -2415,6 +2415,26 @@ EIGEN_STRONG_INLINE Packet8h pmul(const Packet8h& a, const Packet8h& b return float2half(rf); } +template <> +EIGEN_STRONG_INLINE Packet8h pmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return float2half(pmadd(half2float(a), half2float(b), half2float(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return float2half(pmsub(half2float(a), half2float(b), half2float(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pnmadd(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return float2half(pnmadd(half2float(a), half2float(b), half2float(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8h pnmsub(const Packet8h& a, const Packet8h& b, const Packet8h& c) { + return float2half(pnmsub(half2float(a), half2float(b), half2float(c))); +} + template <> EIGEN_STRONG_INLINE Packet8h pdiv(const Packet8h& a, const Packet8h& b) { Packet8f af = half2float(a); @@ -2785,6 +2805,26 @@ EIGEN_STRONG_INLINE Packet8bf pmul(const Packet8bf& a, const Packet8b return F32ToBf16(pmul(Bf16ToF32(a), Bf16ToF32(b))); } +template <> +EIGEN_STRONG_INLINE Packet8bf pmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) { + return F32ToBf16(pmadd(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8bf pmsub(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) { + return F32ToBf16(pmsub(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8bf pnmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) { + return F32ToBf16(pnmadd(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet8bf pnmsub(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) { + return F32ToBf16(pnmsub(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + template <> EIGEN_STRONG_INLINE Packet8bf pdiv(const Packet8bf& a, const Packet8bf& b) { return F32ToBf16(pdiv(Bf16ToF32(a), Bf16ToF32(b))); diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index c07774964..8087a6166 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -2264,6 +2264,7 @@ template <> EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const Packet16h& from) { // (void*) -> workaround clang warning: // cast from 'Eigen::half *' to '__m256i *' increases required alignment from 2 to 32 + EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256((__m256i*)(void*)to, from); } @@ -2271,6 +2272,7 @@ template <> EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to, const Packet16h& from) { // (void*) -> workaround clang warning: // cast from 'Eigen::half *' to '__m256i *' increases required alignment from 2 to 32 + EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256((__m256i*)(void*)to, from); } @@ -2754,11 +2756,13 @@ EIGEN_STRONG_INLINE Packet16bf ploadu(const bfloat16* from) { template <> EIGEN_STRONG_INLINE void pstore(bfloat16* to, const Packet16bf& from) { + EIGEN_DEBUG_ALIGNED_STORE _mm256_store_si256(reinterpret_cast<__m256i*>(to), from); } template <> EIGEN_STRONG_INLINE void pstoreu(bfloat16* to, const Packet16bf& from) { + EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); } @@ -3154,32 +3158,46 @@ EIGEN_STRONG_INLINE Packet8s pset1(const numext::int16_t& x) { template <> EIGEN_STRONG_INLINE void pstore(numext::int16_t* out, const Packet32s& x) { - _mm512_storeu_epi16(out, x); + EIGEN_DEBUG_ALIGNED_STORE + _mm512_store_epi32(out, x); } template <> EIGEN_STRONG_INLINE void pstore(numext::int16_t* out, const Packet16s& x) { - _mm256_storeu_epi16(out, x); + EIGEN_DEBUG_ALIGNED_STORE +#if defined(EIGEN_VECTORIZE_AVX512F) && defined(EIGEN_VECTORIZE_AVX512VL) + _mm256_store_epi32(out, x); +#else + _mm256_store_si256(reinterpret_cast<__m256i*>(out), x); +#endif } template <> EIGEN_STRONG_INLINE void pstore(numext::int16_t* out, const Packet8s& x) { - _mm_storeu_epi16(out, x); + EIGEN_DEBUG_ALIGNED_STORE +#if defined(EIGEN_VECTORIZE_AVX512F) && defined(EIGEN_VECTORIZE_AVX512VL) + _mm256_store_epi32(out, x); +#else + _mm_store_si128(reinterpret_cast<__m128i*>(out), x); +#endif } template <> EIGEN_STRONG_INLINE void pstoreu(numext::int16_t* out, const Packet32s& x) { - _mm512_storeu_epi16(out, x); + EIGEN_DEBUG_UNALIGNED_STORE + _mm512_storeu_epi32(out, x); } template <> EIGEN_STRONG_INLINE void pstoreu(numext::int16_t* out, const Packet16s& x) { - _mm256_storeu_epi16(out, x); + EIGEN_DEBUG_UNALIGNED_STORE + _mm256_storeu_epi32(out, x); } template <> EIGEN_STRONG_INLINE void pstoreu(numext::int16_t* out, const Packet8s& x) { - _mm_storeu_epi16(out, x); + EIGEN_DEBUG_UNALIGNED_STORE + _mm_storeu_epi32(out, x); } template <> diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 482064e17..35cc273f0 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -424,55 +424,6 @@ struct unpacket_traits { masked_store_available = false }; }; -inline std::ostream& operator<<(std::ostream& s, const Packet16c& v) { - union { - Packet16c v; - signed char n[16]; - } vt; - vt.v = v; - for (int i = 0; i < 16; i++) s << vt.n[i] << ", "; - return s; -} - -inline std::ostream& operator<<(std::ostream& s, const Packet16uc& v) { - union { - Packet16uc v; - unsigned char n[16]; - } vt; - vt.v = v; - for (int i = 0; i < 16; i++) s << vt.n[i] << ", "; - return s; -} - -inline std::ostream& operator<<(std::ostream& s, const Packet4f& v) { - union { - Packet4f v; - float n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -} - -inline std::ostream& operator<<(std::ostream& s, const Packet4i& v) { - union { - Packet4i v; - int n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -} - -inline std::ostream& operator<<(std::ostream& s, const Packet4ui& v) { - union { - Packet4ui v; - unsigned int n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -} template EIGEN_STRONG_INLINE Packet pload_common(const __UNPACK_TYPE__(Packet) * from) { @@ -2384,6 +2335,44 @@ EIGEN_STRONG_INLINE Packet8bf pmadd(const Packet8bf& a, const Packet8bf& b, cons return F32ToBf16(pmadd_even, pmadd_odd); } +template <> +EIGEN_STRONG_INLINE Packet8bf pmsub(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) { + Packet4f a_even = Bf16ToF32Even(a); + Packet4f a_odd = Bf16ToF32Odd(a); + Packet4f b_even = Bf16ToF32Even(b); + Packet4f b_odd = Bf16ToF32Odd(b); + Packet4f c_even = Bf16ToF32Even(c); + Packet4f c_odd = Bf16ToF32Odd(c); + Packet4f pmadd_even = pmsub(a_even, b_even, c_even); + Packet4f pmadd_odd = pmsub(a_odd, b_odd, c_odd); + return F32ToBf16(pmadd_even, pmadd_odd); +} +template <> +EIGEN_STRONG_INLINE Packet8bf pnmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) { + Packet4f a_even = Bf16ToF32Even(a); + Packet4f a_odd = Bf16ToF32Odd(a); + Packet4f b_even = Bf16ToF32Even(b); + Packet4f b_odd = Bf16ToF32Odd(b); + Packet4f c_even = Bf16ToF32Even(c); + Packet4f c_odd = Bf16ToF32Odd(c); + Packet4f pmadd_even = pnmadd(a_even, b_even, c_even); + Packet4f pmadd_odd = pnmadd(a_odd, b_odd, c_odd); + return F32ToBf16(pmadd_even, pmadd_odd); +} + +template <> +EIGEN_STRONG_INLINE Packet8bf pnmsub(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) { + Packet4f a_even = Bf16ToF32Even(a); + Packet4f a_odd = Bf16ToF32Odd(a); + Packet4f b_even = Bf16ToF32Even(b); + Packet4f b_odd = Bf16ToF32Odd(b); + Packet4f c_even = Bf16ToF32Even(c); + Packet4f c_odd = Bf16ToF32Odd(c); + Packet4f pmadd_even = pnmsub(a_even, b_even, c_even); + Packet4f pmadd_odd = pnmsub(a_odd, b_odd, c_odd); + return F32ToBf16(pmadd_even, pmadd_odd); +} + template <> EIGEN_STRONG_INLINE Packet8bf pmin(const Packet8bf& a, const Packet8bf& b) { BF16_TO_F32_BINARY_OP_WRAPPER(pmin, a, b); diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index b8d3b4f69..110a07d46 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -673,6 +673,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfl return bfloat16(::fmaxf(f1, f2)); } +EIGEN_DEVICE_FUNC inline bfloat16 fma(const bfloat16& a, const bfloat16& b, const bfloat16& c) { + // Emulate FMA via float. + return bfloat16(static_cast(a) * static_cast(b) + static_cast(c)); +} + #ifndef EIGEN_NO_IO EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const bfloat16& v) { os << static_cast(v); diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index d8c9d5a77..af0d17266 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h @@ -804,6 +804,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half(min)(const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half(max)(const half& a, const half& b) { return a < b ? b : a; } +EIGEN_DEVICE_FUNC inline half fma(const half& a, const half& b, const half& c) { +#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) + return half(vfmah_f16(c.x, a.x, b.x)); +#elif defined(EIGEN_VECTORIZE_AVX512FP16) + // Reduces to vfmadd213sh. + return half(_mm_cvtsh_h(_mm_fmadd_ph(_mm_set_sh(a.x), _mm_set_sh(b.x), _mm_set_sh(c.x)))); +#else + // Emulate FMA via float. + return half(static_cast(a) * static_cast(b) + static_cast(c)); +#endif +} + #ifndef EIGEN_NO_IO EIGEN_ALWAYS_INLINE std::ostream& operator<<(std::ostream& os, const half& v) { os << static_cast(v); @@ -1023,36 +1035,6 @@ struct cast_impl { } }; -#ifdef EIGEN_VECTORIZE_FMA - -template <> -EIGEN_DEVICE_FUNC inline half pmadd(const half& a, const half& b, const half& c) { -#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) - return half(vfmah_f16(a.x, b.x, c.x)); -#elif defined(EIGEN_VECTORIZE_AVX512FP16) - // Reduces to vfmadd213sh. - return half(_mm_cvtsh_h(_mm_fmadd_ph(_mm_set_sh(a.x), _mm_set_sh(b.x), _mm_set_sh(c.x)))); -#else - // Emulate FMA via float. - return half(static_cast(a) * static_cast(b) + static_cast(c)); -#endif -} - -template <> -EIGEN_DEVICE_FUNC inline half pmsub(const half& a, const half& b, const half& c) { -#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) - return half(vfmah_f16(a.x, b.x, -c.x)); -#elif defined(EIGEN_VECTORIZE_AVX512FP16) - // Reduces to vfmadd213sh. - return half(_mm_cvtsh_h(_mm_fmadd_ph(_mm_set_sh(a.x), _mm_set_sh(b.x), -_mm_set_sh(c.x)))); -#else - // Emulate FMA via float. - return half(static_cast(a) * static_cast(b) - static_cast(c)); -#endif -} - -#endif - } // namespace internal } // namespace Eigen diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 3f2d9d51a..8c6cc3271 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -4964,6 +4964,26 @@ EIGEN_STRONG_INLINE Packet4bf pmul(const Packet4bf& a, const Packet4b return F32ToBf16(pmul(Bf16ToF32(a), Bf16ToF32(b))); } +template <> +EIGEN_STRONG_INLINE Packet4bf pmadd(const Packet4bf& a, const Packet4bf& b, const Packet4bf& c) { + return F32ToBf16(pmadd(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet4bf pmsub(const Packet4bf& a, const Packet4bf& b, const Packet4bf& c) { + return F32ToBf16(pmsub(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet4bf pnmadd(const Packet4bf& a, const Packet4bf& b, const Packet4bf& c) { + return F32ToBf16(pnmadd(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + +template <> +EIGEN_STRONG_INLINE Packet4bf pnmsub(const Packet4bf& a, const Packet4bf& b, const Packet4bf& c) { + return F32ToBf16(pnmsub(Bf16ToF32(a), Bf16ToF32(b), Bf16ToF32(c))); +} + template <> EIGEN_STRONG_INLINE Packet4bf pdiv(const Packet4bf& a, const Packet4bf& b) { return F32ToBf16(pdiv(Bf16ToF32(a), Bf16ToF32(b))); @@ -5634,6 +5654,21 @@ EIGEN_STRONG_INLINE Packet4hf pmadd(const Packet4hf& a, const Packet4hf& b, cons return vfma_f16(c, a, b); } +template <> +EIGEN_STRONG_INLINE Packet8hf pmsub(const Packet8hf& a, const Packet8hf& b, const Packet8hf& c) { + return vfmaq_f16(pnegate(c), a, b); +} + +template <> +EIGEN_STRONG_INLINE Packet4hf pnmadd(const Packet4hf& a, const Packet4hf& b, const Packet4hf& c) { + return vfma_f16(c, pnegate(a), b); +} + +template <> +EIGEN_STRONG_INLINE Packet4hf pnmsub(const Packet4hf& a, const Packet4hf& b, const Packet4hf& c) { + return vfma_f16(pnegate(c), pnegate(a), b); +} + template <> EIGEN_STRONG_INLINE Packet8hf pmin(const Packet8hf& a, const Packet8hf& b) { return vminq_f16(a, b); diff --git a/scripts/msvc_setup.ps1 b/scripts/msvc_setup.ps1 new file mode 100644 index 000000000..e2d06422e --- /dev/null +++ b/scripts/msvc_setup.ps1 @@ -0,0 +1,21 @@ +# Powershell script to set up MSVC environment. + +param ($EIGEN_CI_MSVC_ARCH, $EIGEN_CI_MSVC_VER) + +Set-PSDebug -Trace 1 + +function Get-ScriptDirectory { Split-Path $MyInvocation.ScriptName } + +# Set defaults if not already set. +IF (!$EIGEN_CI_MSVC_ARCH) { $EIGEN_CI_MSVC_ARCH = "x64" } +IF (!$EIGEN_CI_MSVC_VER) { $EIGEN_CI_MSVC_VER = "14.29" } + +# Export variables into the global scope +$global:EIGEN_CI_MSVC_ARCH = $EIGEN_CI_MSVC_ARCH +$global:EIGEN_CI_MSVC_VER = $EIGEN_CI_MSVC_VER + +# Find Visual Studio installation directory. +$global:VS_INSTALL_DIR = &"${Env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath +# Run VCVarsAll.bat incitialization script and extract environment variables. +# http://allen-mack.blogspot.com/2008/03/replace-visual-studio-command-prompt.html +cmd.exe /c "`"${VS_INSTALL_DIR}\VC\Auxiliary\Build\vcvarsall.bat`" $EIGEN_CI_MSVC_ARCH -vcvars_ver=$EIGEN_CI_MSVC_VER & set" | foreach { if ($_ -match "=") { $v = $_.split("="); set-item -force -path "ENV:\$($v[0])" -value "$($v[1])" } } \ No newline at end of file diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 64c55fbfd..1e85f8b31 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -24,21 +24,55 @@ template inline T REF_MUL(const T& a, const T& b) { return a * b; } + +template +struct madd_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar madd(const Scalar& a, const Scalar& b, const Scalar& c) { + return a * b + c; + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar msub(const Scalar& a, const Scalar& b, const Scalar& c) { + return a * b - c; + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return c - a * b; + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return Scalar(0) - (a * b + c); + } +}; + +template +struct madd_impl::value && Eigen::NumTraits::IsSigned>> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar madd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::fma(a, b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar msub(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::fma(a, b, Scalar(-c)); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::fma(Scalar(-a), b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar nmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return -Scalar(numext::fma(a, b, c)); + } +}; + template inline T REF_MADD(const T& a, const T& b, const T& c) { - return internal::pmadd(a, b, c); + return madd_impl::madd(a, b, c); } template inline T REF_MSUB(const T& a, const T& b, const T& c) { - return internal::pmsub(a, b, c); + return madd_impl::msub(a, b, c); } template inline T REF_NMADD(const T& a, const T& b, const T& c) { - return internal::pnmadd(a, b, c); + return madd_impl::nmadd(a, b, c); } template inline T REF_NMSUB(const T& a, const T& b, const T& c) { - return internal::pnmsub(a, b, c); + return madd_impl::nmsub(a, b, c); } template inline T REF_DIV(const T& a, const T& b) { @@ -70,6 +104,14 @@ template <> inline bool REF_MADD(const bool& a, const bool& b, const bool& c) { return (a && b) || c; } +template <> +inline bool REF_DIV(const bool& a, const bool& b) { + return a && b; +} +template <> +inline bool REF_RECIPROCAL(const bool& a) { + return a; +} template inline T REF_FREXP(const T& x, T& exp) { @@ -501,8 +543,8 @@ void packetmath() { eigen_optimization_barrier_test::run(); for (int i = 0; i < size; ++i) { - data1[i] = internal::random() / RealScalar(PacketSize); - data2[i] = internal::random() / RealScalar(PacketSize); + data1[i] = internal::random(); + data2[i] = internal::random(); refvalue = (std::max)(refvalue, numext::abs(data1[i])); } @@ -522,8 +564,8 @@ void packetmath() { for (int M = 0; M < PacketSize; ++M) { for (int N = 0; N <= PacketSize; ++N) { for (int j = 0; j < size; ++j) { - data1[j] = internal::random() / RealScalar(PacketSize); - data2[j] = internal::random() / RealScalar(PacketSize); + data1[j] = internal::random(); + data2[j] = internal::random(); refvalue = (std::max)(refvalue, numext::abs(data1[j])); } @@ -652,11 +694,11 @@ void packetmath() { // Avoid overflows. if (NumTraits::IsInteger && NumTraits::IsSigned && Eigen::internal::unpacket_traits::size > 1) { - Scalar limit = - static_cast(std::pow(static_cast(numext::real(NumTraits::highest())), - 1.0 / static_cast(Eigen::internal::unpacket_traits::size))); + Scalar limit = static_cast( + static_cast(std::pow(static_cast(numext::real(NumTraits::highest())), + 1.0 / static_cast(Eigen::internal::unpacket_traits::size)))); for (int i = 0; i < PacketSize; ++i) { - data1[i] = internal::random(-limit, limit); + data1[i] = internal::random(Scalar(0) - limit, limit); } } ref[0] = Scalar(1); @@ -1683,7 +1725,7 @@ void packetmath_scatter_gather() { for (Index N = 0; N <= PacketSize; ++N) { for (Index i = 0; i < N; ++i) { - data1[i] = internal::random() / RealScalar(PacketSize); + data1[i] = internal::random(); } for (Index i = 0; i < N * 20; ++i) { @@ -1702,7 +1744,7 @@ void packetmath_scatter_gather() { } for (Index i = 0; i < N * 7; ++i) { - buffer[i] = internal::random() / RealScalar(PacketSize); + buffer[i] = internal::random(); } packet = internal::pgather_partial(buffer, 7, N); internal::pstore_partial(data1, packet, N); diff --git a/test/packetmath_test_shared.h b/test/packetmath_test_shared.h index 93d41496b..7d7a0dacf 100644 --- a/test/packetmath_test_shared.h +++ b/test/packetmath_test_shared.h @@ -162,7 +162,9 @@ struct packet_helper { template inline Packet load(const T* from, unsigned long long umask) const { - return internal::ploadu(from, umask); + using UMaskType = typename numext::get_integer_by_size::size / CHAR_BIT, 1)>::unsigned_type; + return internal::ploadu(from, static_cast(umask)); } template @@ -172,7 +174,9 @@ struct packet_helper { template inline void store(T* to, const Packet& x, unsigned long long umask) const { - internal::pstoreu(to, x, umask); + using UMaskType = typename numext::get_integer_by_size::size / CHAR_BIT, 1)>::unsigned_type; + internal::pstoreu(to, x, static_cast(umask)); } template