From 218764ee1f0a21e1faf20ed314ffafeae79eb170 Mon Sep 17 00:00:00 2001 From: Srinivas Vasudevan Date: Fri, 2 Dec 2016 14:13:01 -0800 Subject: [PATCH] Added support for expm1 in Eigen. --- Eigen/src/Core/GenericPacketMath.h | 5 ++ Eigen/src/Core/GlobalFunctions.h | 1 + Eigen/src/Core/MathFunctions.h | 69 +++++++++++++++++++ Eigen/src/Core/arch/CUDA/Half.h | 3 + Eigen/src/Core/arch/CUDA/MathFunctions.h | 13 ++++ Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 11 ++- Eigen/src/Core/functors/UnaryFunctors.h | 20 ++++++ Eigen/src/plugins/ArrayCwiseUnaryOps.h | 17 ++++- test/array.cpp | 25 ++++--- test/half_float.cpp | 5 ++ test/packetmath.cpp | 1 + .../Eigen/CXX11/src/Tensor/TensorBase.h | 6 ++ .../Eigen/CXX11/src/Tensor/TensorMeta.h | 1 + .../test/cxx11_tensor_builtins_sycl.cpp | 1 + .../test/cxx11_tensor_of_float16_cuda.cu | 6 ++ 15 files changed, 171 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 27033a2dd..ac5552d3e 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -61,6 +61,7 @@ struct default_packet_traits HasSqrt = 0, HasRsqrt = 0, HasExp = 0, + HasExpm1 = 0, HasLog = 0, HasLog1p = 0, HasLog10 = 0, @@ -401,6 +402,10 @@ Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } +/** \internal \returns the expm1 of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pexpm1(const Packet& a) { return numext::expm1(a); } + /** \internal \returns the log of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 769dc255c..12828a7c3 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -71,6 +71,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 1ac0b2473..af02d0247 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -13,6 +13,7 @@ // source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html // TODO this should better be moved to NumTraits #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L +#define EIGEN_LN2 0.69314718055994530941723212145817656807550013436024425412068001L namespace Eigen { @@ -482,6 +483,54 @@ struct arg_retval typedef typename NumTraits::Real type; }; +/**************************************************************************** +* Implementation of expm1 * +****************************************************************************/ + +// This implementation is based on GSL Math's expm1. +namespace std_fallback { + // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar, + // or that there is no suitable std::expm1 function available. Implementation + // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php. + template + EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + typedef typename NumTraits::Real RealScalar; + + EIGEN_USING_STD_MATH(exp); + Scalar u = exp(x); + if (u == RealScalar(1)) { + return x; + } + if (u - RealScalar(1) == RealScalar(-1)) { + return RealScalar(-1); + } + + EIGEN_USING_STD_MATH(log); + return (u - RealScalar(1)) * x / log(u); + } +} + +template +struct expm1_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + #if EIGEN_HAS_CXX11_MATH + using std::expm1; + #endif + using std_fallback::expm1; + return expm1(x); + } +}; + + +template +struct expm1_retval +{ + typedef Scalar type; +}; + /**************************************************************************** * Implementation of log1p * ****************************************************************************/ @@ -1232,6 +1281,26 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double &x) { return ::exp(x); } #endif +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float expm1(float x) { return cl::sycl::expm1(x); } +EIGEN_ALWAYS_INLINE double expm1(double x) { return cl::sycl::expm1(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float expm1(const float &x) { return ::expm1f(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double expm1(const double &x) { return ::expm1(x); } +#endif + template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x) { diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 5a400307b..db9878796 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -392,6 +392,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { return half(::expf(float(a))); #endif } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { + return half(numext::expm1(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return half(::hlog(a)); diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 0348b41db..3548f2fa2 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -56,6 +56,19 @@ double2 pexp(const double2& a) return make_double2(exp(a.x), exp(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pexp(const float4& a) +{ + return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pexp(const double2& a) +{ + using ::expm1; + return make_double2(expm1(a.x), expm1(a.y)); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt(const float4& a) { diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index ae54225f8..35cb0efd5 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,6 +34,7 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, + HasExpm1 = 1, HasLog = 1, HasLog1p = 1 }; @@ -267,7 +268,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul(const ha #endif } -template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = log1pf(a1); @@ -275,6 +276,14 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { return __floats2half2_rn(r1, r2); } +template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = expm1f(a1); + float r2 = expm1f(a2); + return __floats2half2_rn(r1, r2); +} + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> __device__ EIGEN_STRONG_INLINE diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 9d4d3eece..bfc046556 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -262,6 +262,26 @@ struct functor_traits > { }; }; +/** \internal + * + * \brief Template functor to compute the exponential of a scalar - 1. + * + * \sa class CwiseUnaryOp, ArrayBase::expm1() + */ +template struct scalar_expm1_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); } + template + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); } +}; +template +struct functor_traits > { + enum { + PacketAccess = packet_traits::HasExpm1, + Cost = functor_traits >::Cost // TODO measure cost of expm1 + }; +}; + /** \internal * * \brief Template functor to compute the logarithm of a scalar diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index ebaa3f192..43615bd56 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -10,6 +10,7 @@ typedef CwiseUnaryOp, const Derived> Inverse typedef CwiseUnaryOp, const Derived> BooleanNotReturnType; typedef CwiseUnaryOp, const Derived> ExpReturnType; +typedef CwiseUnaryOp, const Derived> Expm1ReturnType; typedef CwiseUnaryOp, const Derived> LogReturnType; typedef CwiseUnaryOp, const Derived> Log1pReturnType; typedef CwiseUnaryOp, const Derived> Log10ReturnType; @@ -90,6 +91,20 @@ exp() const return ExpReturnType(derived()); } +/** \returns an expression of the coefficient-wise exponential of *this minus 1. + * + * In exact arithmetic, \c x.expm1() is equivalent to \c x.exp() - 1, + * however, with finite precision, this function is much more accurate when \c x is close to zero. + * + * \sa Math functions, exp() + */ +EIGEN_DEVICE_FUNC +inline const Expm1ReturnType +expm1() const +{ + return Expm1ReturnType(derived()); +} + /** \returns an expression of the coefficient-wise logarithm of *this. * * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the @@ -98,7 +113,7 @@ exp() const * Example: \include Cwise_log.cpp * Output: \verbinclude Cwise_log.out * - * \sa Math functions, exp() + * \sa Math functions, log() */ EIGEN_DEVICE_FUNC inline const LogReturnType diff --git a/test/array.cpp b/test/array.cpp index 15c3266a9..f7f3ba780 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -18,7 +18,7 @@ template void array(const ArrayType& m) typedef Array RowVectorType; Index rows = m.rows(); - Index cols = m.cols(); + Index cols = m.cols(); ArrayType m1 = ArrayType::Random(rows, cols), m2 = ArrayType::Random(rows, cols), @@ -44,25 +44,25 @@ template void array(const ArrayType& m) VERIFY_IS_APPROX(m3, m1 + s2); m3 = m1; m3 -= s1; - VERIFY_IS_APPROX(m3, m1 - s1); - + VERIFY_IS_APPROX(m3, m1 - s1); + // scalar operators via Maps m3 = m1; ArrayType::Map(m1.data(), m1.rows(), m1.cols()) -= ArrayType::Map(m2.data(), m2.rows(), m2.cols()); VERIFY_IS_APPROX(m1, m3 - m2); - + m3 = m1; ArrayType::Map(m1.data(), m1.rows(), m1.cols()) += ArrayType::Map(m2.data(), m2.rows(), m2.cols()); VERIFY_IS_APPROX(m1, m3 + m2); - + m3 = m1; ArrayType::Map(m1.data(), m1.rows(), m1.cols()) *= ArrayType::Map(m2.data(), m2.rows(), m2.cols()); VERIFY_IS_APPROX(m1, m3 * m2); - + m3 = m1; m2 = ArrayType::Random(rows,cols); m2 = (m2==0).select(1,m2); - ArrayType::Map(m1.data(), m1.rows(), m1.cols()) /= ArrayType::Map(m2.data(), m2.rows(), m2.cols()); + ArrayType::Map(m1.data(), m1.rows(), m1.cols()) /= ArrayType::Map(m2.data(), m2.rows(), m2.cols()); VERIFY_IS_APPROX(m1, m3 / m2); // reductions @@ -84,7 +84,7 @@ template void array(const ArrayType& m) VERIFY_IS_APPROX(m3.rowwise() += rv1, m1.rowwise() + rv1); m3 = m1; VERIFY_IS_APPROX(m3.rowwise() -= rv1, m1.rowwise() - rv1); - + // Conversion from scalar VERIFY_IS_APPROX((m3 = s1), ArrayType::Constant(rows,cols,s1)); VERIFY_IS_APPROX((m3 = 1), ArrayType::Constant(rows,cols,1)); @@ -102,7 +102,7 @@ template void array(const ArrayType& m) f1.setRandom(); FixedArrayType f4(f1.data()); VERIFY_IS_APPROX(f4, f1); - + // pow VERIFY_IS_APPROX(m1.pow(2), m1.square()); VERIFY_IS_APPROX(pow(m1,2), m1.square()); @@ -144,7 +144,7 @@ template void comparisons(const ArrayType& m) m2 = ArrayType::Random(rows, cols), m3(rows, cols), m4 = m1; - + m4 = (m4.abs()==Scalar(0)).select(1,m4); VERIFY(((m1 + Scalar(1)) > m1).all()); @@ -295,6 +295,9 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.exp(), exp(m1)); VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp()); + VERIFY_IS_APPROX(m1.expm1(), expm1(m1)); + VERIFY_IS_APPROX((m3 + smallNumber).exp() - 1, expm1(abs(m3) + smallNumber)); + VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); @@ -329,7 +332,7 @@ template void array_complex(const ArrayType& m) ArrayType m1 = ArrayType::Random(rows, cols), m2(rows, cols), m4 = m1; - + m4.real() = (m4.real().abs()==RealScalar(0)).select(RealScalar(1),m4.real()); m4.imag() = (m4.imag().abs()==RealScalar(0)).select(RealScalar(1),m4.imag()); diff --git a/test/half_float.cpp b/test/half_float.cpp index f8d438e2f..6f3196852 100644 --- a/test/half_float.cpp +++ b/test/half_float.cpp @@ -185,6 +185,11 @@ void test_basic_functions() VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); VERIFY_IS_APPROX(float(exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); + VERIFY_IS_EQUAL(float(numext::expm1(half(0.0f))), 0.0f); + VERIFY_IS_EQUAL(float(expm1(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::expm1(half(2.0f))), 6.3890561f); + VERIFY_IS_APPROX(float(expm1(half(2.0f))), 6.3890561f); + VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f); VERIFY_IS_EQUAL(float(log(half(1.0f))), 0.0f); VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 7821a1738..08b360340 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -438,6 +438,7 @@ template void packetmath_real() CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); #if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L) + CHECK_CWISE1_IF(PacketTraits::HasExpm1, std::expm1, internal::pexpm1); CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p); CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 7a45a5cf4..fbe340820 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -185,6 +185,12 @@ class TensorBase return unaryExpr(internal::scalar_exp_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + expm1() const { + return unaryExpr(internal::scalar_expm1_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> log() const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 25ce471f9..b5ef31d55 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -75,6 +75,7 @@ struct PacketType { HasSqrt = 1, HasRsqrt = 1, HasExp = 1, + HasExpm1 = 0, HasLog = 1, HasLog1p = 0, HasLog10 = 0, diff --git a/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/unsupported/test/cxx11_tensor_builtins_sycl.cpp index d8c2898ca..e230b626f 100644 --- a/unsupported/test/cxx11_tensor_builtins_sycl.cpp +++ b/unsupported/test/cxx11_tensor_builtins_sycl.cpp @@ -91,6 +91,7 @@ template T inverse(T x) { return 1 / x; } TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR) \ TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR) \ TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR) \ TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR) \ TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR) \ TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR) \ diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 2f86980a2..908a5e5a9 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -200,6 +200,8 @@ void test_cuda_trancendental() { Eigen::TensorMap, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem); Eigen::TensorMap, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem); Eigen::TensorMap, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res4_half(d_res3_half, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem); gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f); @@ -207,6 +209,7 @@ void test_cuda_trancendental() { gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast(); gpu_res2_float.device(gpu_device) = gpu_float2.log().cast(); gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast(); + gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast(); gpu_res1_half.device(gpu_device) = gpu_float1.cast(); gpu_res1_half.device(gpu_device) = gpu_res1_half.exp(); @@ -217,6 +220,9 @@ void test_cuda_trancendental() { gpu_res3_half.device(gpu_device) = gpu_float3.cast(); gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p(); + gpu_res3_half.device(gpu_device) = gpu_float3.cast(); + gpu_res3_half.device(gpu_device) = gpu_res3_half.expm1(); + Tensor input1(num_elem); Tensor half_prec1(num_elem); Tensor full_prec1(num_elem);