From 9000b3767770f6dd0f4cfb12f4e19c71921885a4 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 30 Apr 2024 22:18:25 +0000 Subject: [PATCH] Fix new generic nearest integer ops on GPU. --- .../arch/Default/GenericPacketMathFunctions.h | 10 +++---- .../Default/GenericPacketMathFunctionsFwd.h | 10 +++---- Eigen/src/Core/arch/GPU/PacketMath.h | 30 +++++++++++++++++-- Eigen/src/Core/util/Macros.h | 4 +++ 4 files changed, 41 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 537dffe9a..1c46ba48d 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -2470,7 +2470,7 @@ struct unary_pow_impl { }; template -EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a) { using Scalar = typename unpacket_traits::type; using IntType = typename numext::get_integer_by_size::signed_type; // Adds and subtracts signum(a) * 2^kMantissaBits to force rounding. @@ -2490,7 +2490,7 @@ EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a) { } template -EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a) { using Scalar = typename unpacket_traits::type; const Packet cst_1 = pset1(Scalar(1)); Packet rint_a = generic_rint(a); @@ -2502,7 +2502,7 @@ EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a) { } template -EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) { using Scalar = typename unpacket_traits::type; const Packet cst_1 = pset1(Scalar(1)); Packet rint_a = generic_rint(a); @@ -2514,7 +2514,7 @@ EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) { } template -EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a) { Packet abs_a = pabs(a); Packet sign_a = pandnot(a, abs_a); Packet floor_abs_a = generic_floor(abs_a); @@ -2523,7 +2523,7 @@ EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a) { } template -EIGEN_STRONG_INLINE Packet generic_round(const Packet& a) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a) { using Scalar = typename unpacket_traits::type; const Packet cst_half = pset1(Scalar(0.5)); const Packet cst_1 = pset1(Scalar(1)); diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index 41dc068fc..1bf112816 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -134,19 +134,19 @@ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& x); template -EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a); template -EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_floor(const Packet& a); template -EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a); template -EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_trunc(const Packet& a); template -EIGEN_STRONG_INLINE Packet generic_round(const Packet& a); +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a); // Macros for instantiating these generic functions for different backends. #define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET) \ diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h index 352c8f5bf..6d4230aa7 100644 --- a/Eigen/src/Core/arch/GPU/PacketMath.h +++ b/Eigen/src/Core/arch/GPU/PacketMath.h @@ -74,7 +74,6 @@ struct packet_traits : default_packet_traits { HasGammaSampleDerAlpha = 1, HasIGammac = 1, HasBetaInc = 1, - HasBlend = 0 }; }; @@ -106,9 +105,7 @@ struct packet_traits : default_packet_traits { HasGammaSampleDerAlpha = 1, HasIGammac = 1, HasBetaInc = 1, - HasBlend = 0, - HasFloor = 1, }; }; @@ -518,6 +515,33 @@ EIGEN_DEVICE_FUNC inline double2 pfloor(const double2& a) { return make_double2(floor(a.x), floor(a.y)); } +template <> +EIGEN_DEVICE_FUNC inline float4 pceil(const float4& a) { + return make_float4(ceilf(a.x), ceilf(a.y), ceilf(a.z), ceilf(a.w)); +} +template <> +EIGEN_DEVICE_FUNC inline double2 pceil(const double2& a) { + return make_double2(ceil(a.x), ceil(a.y)); +} + +template <> +EIGEN_DEVICE_FUNC inline float4 print(const float4& a) { + return make_float4(rintf(a.x), rintf(a.y), rintf(a.z), rintf(a.w)); +} +template <> +EIGEN_DEVICE_FUNC inline double2 print(const double2& a) { + return make_double2(rint(a.x), rint(a.y)); +} + +template <> +EIGEN_DEVICE_FUNC inline float4 ptrunc(const float4& a) { + return make_float4(truncf(a.x), truncf(a.y), truncf(a.z), truncf(a.w)); +} +template <> +EIGEN_DEVICE_FUNC inline double2 ptrunc(const double2& a) { + return make_double2(trunc(a.x), trunc(a.y)); +} + EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { float tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 57092570b..4d10eecd0 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -964,6 +964,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void ignore_unused_variable(cons // added then subtracted, which is otherwise compiled away with -ffast-math. // // See bug 1674 +#if defined(EIGEN_GPU_COMPILE_PHASE) +#define EIGEN_OPTIMIZATION_BARRIER(X) +#endif + #if !defined(EIGEN_OPTIMIZATION_BARRIER) #if EIGEN_COMP_GNUC // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html: