diff --git a/Eigen/Core b/Eigen/Core index 1ec749452..63602f4c3 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -300,6 +300,7 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" #if defined EIGEN_VECTORIZE_AVX diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 5f27d8166..8ad51bad5 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -43,7 +43,7 @@ struct default_packet_traits { enum { HasHalfPacket = 0, - + HasAdd = 1, HasSub = 1, HasMul = 1, @@ -74,6 +74,9 @@ struct default_packet_traits HasSinh = 0, HasCosh = 0, HasTanh = 0, + HasLGamma = 0, + HasErf = 0, + HasErfc = 0, HasRound = 0, HasFloor = 0, @@ -432,6 +435,18 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { using numext::erf; return erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 585974809..62fec7008 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -49,6 +49,9 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index ae28d4db6..75a80daaa 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -155,6 +155,10 @@ template class MapBase checkSanity(); } + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + protected: EIGEN_DEVICE_FUNC diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h new file mode 100644 index 000000000..05973e372 --- /dev/null +++ b/Eigen/src/Core/SpecialFunctions.h @@ -0,0 +1,160 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIAL_FUNCTIONS_H +#define EIGEN_SPECIAL_FUNCTIONS_H + +namespace Eigen { +namespace internal { + +/**************************************************************************** + * Implementation of lgamma * + ****************************************************************************/ + +template +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct lgamma_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template<> +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); } +}; + +template<> +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); } +}; +#endif + +/**************************************************************************** + * Implementation of erf * + ****************************************************************************/ + +template +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct erf_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template<> +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); } +}; + +template<> +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); } +}; +#endif // EIGEN_HAS_C99_MATH + +/*************************************************************************** +* Implementation of erfc * +****************************************************************************/ + +template +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +template +struct erfc_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template<> +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } +}; + +template<> +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } +}; +#endif // EIGEN_HAS_C99_MATH + +} // end namespace internal + + +namespace numext { + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); +} + +} // end namespace numext + +} // end namespace Eigen + +#endif // EIGEN_SPECIAL_FUNCTIONS_H diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 3bea88bea..ecd5c444e 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -66,6 +66,43 @@ double2 prsqrt(const double2& a) return make_double2(rsqrt(a.x), rsqrt(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plgamma(const float4& a) +{ + return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plgamma(const double2& a) +{ + return make_double2(lgamma(a.x), lgamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perf(const float4& a) +{ + return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perf(const double2& a) +{ + return make_double2(erf(a.x), erf(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perfc(const float4& a) +{ + return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perfc(const double2& a) +{ + return make_double2(erfc(a.x), erfc(a.y)); +} + + #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 0d2c2fef0..cb1b547e0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -39,6 +39,9 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, }; @@ -59,6 +62,9 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, }; diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index e630acc38..6891cfdda 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -403,6 +403,77 @@ struct functor_traits > }; }; + +/** \internal + * \brief Template functor to compute the natural log of the absolute + * value of Gamma of a scalar + * \sa class CwiseUnaryOp, Cwise::lgamma() + */ +template struct scalar_lgamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::lgamma; return lgamma(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasLGamma + }; +}; + +/** \internal + * \brief Template functor to compute the Gauss error function of a + * scalar + * \sa class CwiseUnaryOp, Cwise::erf() + */ +template struct scalar_erf_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erf; return erf(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perf(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasErf + }; +}; + +/** \internal + * \brief Template functor to compute the Complementary Error Function + * of a scalar + * \sa class CwiseUnaryOp, Cwise::erfc() + */ +template struct scalar_erfc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erfc; return erfc(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasErfc + }; +}; + + /** \internal * \brief Template functor to compute the atan of a scalar * \sa class CwiseUnaryOp, ArrayBase::atan() @@ -422,6 +493,7 @@ struct functor_traits > }; }; + /** \internal * \brief Template functor to compute the tanh of a scalar * \sa class CwiseUnaryOp, ArrayBase::tanh() diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index fcad3694e..e0bc1689d 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -341,6 +341,13 @@ #define EIGEN_HAVE_RVALUE_REFERENCES #endif +// Does the compiler support C99? +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ + || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) +#define EIGEN_HAS_C99_MATH 1 +#endif + // Does the compiler support result_of? #if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) #define EIGEN_HAS_STD_RESULT_OF 1 diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 108181419..1fe365aa7 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -96,7 +96,8 @@ STORAGE_LAYOUT_DOES_NOT_MATCH, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, - MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, + THIS_TYPE_IS_NOT_SUPPORTED }; }; diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index cb918860c..59c965e15 100755 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -666,7 +666,7 @@ void JacobiSVD::allocate(Index rows, Index cols, u if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this); if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this); - if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols); + if(m_rows!=m_cols) m_scaledMatrix.resize(rows,cols); } template diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 45e826b0c..01432e2f3 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -21,6 +21,9 @@ typedef CwiseUnaryOp, const Derived> AtanReturn typedef CwiseUnaryOp, const Derived> TanhReturnType; typedef CwiseUnaryOp, const Derived> SinhReturnType; typedef CwiseUnaryOp, const Derived> CoshReturnType; +typedef CwiseUnaryOp, const Derived> LgammaReturnType; +typedef CwiseUnaryOp, const Derived> ErfReturnType; +typedef CwiseUnaryOp, const Derived> ErfcReturnType; typedef CwiseUnaryOp, const Derived> PowReturnType; typedef CwiseUnaryOp, const Derived> SquareReturnType; typedef CwiseUnaryOp, const Derived> CubeReturnType; @@ -302,6 +305,47 @@ cosh() const return CoshReturnType(derived()); } +/** \returns an expression of the coefficient-wise ln(|gamma(*this)|). + * + * Example: \include Cwise_lgamma.cpp + * Output: \verbinclude Cwise_lgamma.out + * + * \sa cos(), sin(), tan() + */ +inline const LgammaReturnType +lgamma() const +{ + return LgammaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Gauss error + * function of *this. + * + * Example: \include Cwise_erf.cpp + * Output: \verbinclude Cwise_erf.out + * + * \sa cos(), sin(), tan() + */ +inline const ErfReturnType +erf() const +{ + return ErfReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Complementary error + * function of *this. + * + * Example: \include Cwise_erfc.cpp + * Output: \verbinclude Cwise_erfc.out + * + * \sa cos(), sin(), tan() + */ +inline const ErfcReturnType +erfc() const +{ + return ErfcReturnType(derived()); +} + /** \returns an expression of the coefficient-wise power of *this to the given exponent. * * This function computes the coefficient-wise power. The function MatrixBase::pow() in the diff --git a/doc/PreprocessorDirectives.dox b/doc/PreprocessorDirectives.dox index 76ce2eb99..7cde1a36f 100644 --- a/doc/PreprocessorDirectives.dox +++ b/doc/PreprocessorDirectives.dox @@ -106,6 +106,7 @@ following macros are supported; none of them are defined by default. - \b EIGEN_MATRIX_PLUGIN - filename of plugin for extending the Matrix class. - \b EIGEN_MATRIXBASE_PLUGIN - filename of plugin for extending the MatrixBase class. - \b EIGEN_PLAINOBJECTBASE_PLUGIN - filename of plugin for extending the PlainObjectBase class. + - \b EIGEN_MAPBASE_PLUGIN - filename of plugin for extending the MapBase class. - \b EIGEN_QUATERNION_PLUGIN - filename of plugin for extending the Quaternion class. - \b EIGEN_QUATERNIONBASE_PLUGIN - filename of plugin for extending the QuaternionBase class. - \b EIGEN_SPARSEMATRIX_PLUGIN - filename of plugin for extending the SparseMatrix class. diff --git a/test/array.cpp b/test/array.cpp index 5395721f5..6adedfb06 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -202,7 +202,7 @@ template void array_real(const ArrayType& m) m2 = ArrayType::Random(rows, cols), m3(rows, cols), m4 = m1; - + m4 = (m4.abs()==Scalar(0)).select(1,m4); Scalar s1 = internal::random(); @@ -217,6 +217,11 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); +#ifdef EIGEN_HAS_C99_MATH + VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); + VERIFY_IS_APPROX(m1.erf(), erf(m1)); + VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); +#endif // EIGEN_HAS_C99_MATH VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.round(), round(m1)); VERIFY_IS_APPROX(m1.floor(), floor(m1)); diff --git a/test/packetmath.cpp b/test/packetmath.cpp index f1826f0ef..e09a361bf 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -338,7 +338,7 @@ template void packetmath_real() data1[1] = 0; h.store(data2, internal::pexp(h.load(data1))); VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::epsilon()), data2[0]); - VERIFY_IS_EQUAL(std::exp(0), data2[1]); + VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]); data1[0] = (std::numeric_limits::min)(); data1[1] = -(std::numeric_limits::min)(); @@ -353,15 +353,43 @@ template void packetmath_real() VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); } +#ifdef EIGEN_HAS_C99_MATH + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasLGamma,Packet> h; + h.store(data2, internal::plgamma(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErf,Packet> h; + h.store(data2, internal::perf(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErfc,Packet> h; + h.store(data2, internal::perfc(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } +#endif // EIGEN_HAS_C99_MATH + for (int i=0; i(0,1) * std::pow(Scalar(10), internal::random(-6,6)); data2[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); } + if(internal::random(0,1)<0.1) data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); +#if defined(EIGEN_HAS_C99_MATH) && (__cplusplus > 199711L) + CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); + CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); + CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); +#endif + if(PacketTraits::HasLog && PacketTraits::size>=2) { data1[0] = std::numeric_limits::quiet_NaN(); @@ -375,7 +403,7 @@ template void packetmath_real() data1[1] = 0; h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); - VERIFY_IS_EQUAL(std::log(0), data2[1]); + VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]); data1[0] = (std::numeric_limits::min)(); data1[1] = -(std::numeric_limits::min)(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index d1ce3d0ed..392acf302 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -122,6 +122,24 @@ class TensorBase return unaryExpr(internal::scalar_tanh_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + lgamma() const { + return unaryExpr(internal::scalar_lgamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erf() const { + return unaryExpr(internal::scalar_erf_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erfc() const { + return unaryExpr(internal::scalar_erfc_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> sigmoid() const { diff --git a/unsupported/test/cxx11_tensor_cuda.cpp b/unsupported/test/cxx11_tensor_cuda.cpp index 5ff082a3a..49e1894ab 100644 --- a/unsupported/test/cxx11_tensor_cuda.cpp +++ b/unsupported/test/cxx11_tensor_cuda.cpp @@ -507,6 +507,115 @@ static void test_cuda_convolution_3d() } } + +template +void test_cuda_lgamma(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.lgamma(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j))); + } + } +} + +template +void test_cuda_erf(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erf(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j))); + } + } +} + +template +void test_cuda_erfc(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erfc(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j))); + } + } +} + void test_cxx11_tensor_cuda() { CALL_SUBTEST(test_cuda_elementwise_small()); @@ -522,4 +631,34 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST(test_cuda_convolution_2d()); CALL_SUBTEST(test_cuda_convolution_3d()); CALL_SUBTEST(test_cuda_convolution_3d()); + CALL_SUBTEST(test_cuda_lgamma(1.0f)); + CALL_SUBTEST(test_cuda_lgamma(100.0f)); + CALL_SUBTEST(test_cuda_lgamma(0.01f)); + CALL_SUBTEST(test_cuda_lgamma(0.001f)); + CALL_SUBTEST(test_cuda_erf(1.0f)); + CALL_SUBTEST(test_cuda_erf(100.0f)); + CALL_SUBTEST(test_cuda_erf(0.01f)); + CALL_SUBTEST(test_cuda_erf(0.001f)); + CALL_SUBTEST(test_cuda_erfc(1.0f)); + // CALL_SUBTEST(test_cuda_erfc(100.0f)); + CALL_SUBTEST(test_cuda_erfc(5.0f)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST(test_cuda_erfc(0.01f)); + CALL_SUBTEST(test_cuda_erfc(0.001f)); + CALL_SUBTEST(test_cuda_tanh(1.0)); + CALL_SUBTEST(test_cuda_tanh(100.0)); + CALL_SUBTEST(test_cuda_tanh(0.01)); + CALL_SUBTEST(test_cuda_tanh(0.001)); + CALL_SUBTEST(test_cuda_lgamma(1.0)); + CALL_SUBTEST(test_cuda_lgamma(100.0)); + CALL_SUBTEST(test_cuda_lgamma(0.01)); + CALL_SUBTEST(test_cuda_lgamma(0.001)); + CALL_SUBTEST(test_cuda_erf(1.0)); + CALL_SUBTEST(test_cuda_erf(100.0)); + CALL_SUBTEST(test_cuda_erf(0.01)); + CALL_SUBTEST(test_cuda_erf(0.001)); + CALL_SUBTEST(test_cuda_erfc(1.0)); + // CALL_SUBTEST(test_cuda_erfc(100.0)); + CALL_SUBTEST(test_cuda_erfc(5.0)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST(test_cuda_erfc(0.01)); + CALL_SUBTEST(test_cuda_erfc(0.001)); }