mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-16 14:49:39 +08:00
Get rid of initialization logic for blueNorm by making the computed constants static const or constexpr.
Move macro definition EIGEN_CONSTEXPR to Core and make all methods in NumTraits constexpr when EIGEN_HASH_CONSTEXPR is 1.
This commit is contained in:
parent
14022f5eb5
commit
e55182ac09
@ -21,14 +21,14 @@ template< typename T,
|
||||
bool is_integer = NumTraits<T>::IsInteger>
|
||||
struct default_digits10_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static int run() { return std::numeric_limits<T>::digits10; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct default_digits10_impl<T,false,false> // Floating point
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static int run() {
|
||||
using std::log10;
|
||||
using std::ceil;
|
||||
@ -40,7 +40,7 @@ struct default_digits10_impl<T,false,false> // Floating point
|
||||
template<typename T>
|
||||
struct default_digits10_impl<T,false,true> // Integer
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static int run() { return 0; }
|
||||
};
|
||||
|
||||
@ -52,14 +52,14 @@ template< typename T,
|
||||
bool is_integer = NumTraits<T>::IsInteger>
|
||||
struct default_digits_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static int run() { return std::numeric_limits<T>::digits; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct default_digits_impl<T,false,false> // Floating point
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static int run() {
|
||||
using std::log;
|
||||
using std::ceil;
|
||||
@ -71,7 +71,7 @@ struct default_digits_impl<T,false,false> // Floating point
|
||||
template<typename T>
|
||||
struct default_digits_impl<T,false,true> // Integer
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static int run() { return 0; }
|
||||
};
|
||||
|
||||
@ -140,25 +140,25 @@ template<typename T> struct GenericNumTraits
|
||||
typedef T Nested;
|
||||
typedef T Literal;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline Real epsilon()
|
||||
{
|
||||
return numext::numeric_limits<T>::epsilon();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline int digits10()
|
||||
{
|
||||
return internal::default_digits10_impl<T>::run();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline int digits()
|
||||
{
|
||||
return internal::default_digits_impl<T>::run();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline Real dummy_precision()
|
||||
{
|
||||
// make sure to override this for floating-point types
|
||||
@ -166,23 +166,23 @@ template<typename T> struct GenericNumTraits
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline T highest() {
|
||||
return (numext::numeric_limits<T>::max)();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline T lowest() {
|
||||
return IsInteger ? (numext::numeric_limits<T>::min)()
|
||||
: static_cast<T>(-(numext::numeric_limits<T>::max)());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline T infinity() {
|
||||
return numext::numeric_limits<T>::infinity();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline T quiet_NaN() {
|
||||
return numext::numeric_limits<T>::quiet_NaN();
|
||||
}
|
||||
@ -194,19 +194,20 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
template<> struct NumTraits<float>
|
||||
: GenericNumTraits<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline float dummy_precision() { return 1e-5f; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<double> : GenericNumTraits<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline double dummy_precision() { return 1e-12; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<long double>
|
||||
: GenericNumTraits<long double>
|
||||
{
|
||||
EIGEN_CONSTEXPR
|
||||
static inline long double dummy_precision() { return 1e-15l; }
|
||||
};
|
||||
|
||||
@ -223,11 +224,11 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
|
||||
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline int digits10() { return NumTraits<Real>::digits10(); }
|
||||
};
|
||||
|
||||
@ -252,11 +253,12 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
|
||||
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
|
||||
|
||||
EIGEN_CONSTEXPR
|
||||
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
|
||||
};
|
||||
|
||||
@ -270,6 +272,7 @@ template<> struct NumTraits<std::string>
|
||||
MulCost = HugeCost
|
||||
};
|
||||
|
||||
EIGEN_CONSTEXPR
|
||||
static inline int digits10() { return 0; }
|
||||
|
||||
private:
|
||||
|
@ -127,63 +127,28 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
using std::pow;
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
|
||||
// This program calculates the machine-dependent constants
|
||||
// bl, b2, slm, s2m, relerr overfl
|
||||
// from the "basic" machine-dependent numbers
|
||||
// nbig, ibeta, it, iemin, iemax, rbig.
|
||||
// The following define the basic machine-dependent constants.
|
||||
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
|
||||
// are used. For any specific computer, each of the assignment
|
||||
// statements can be replaced
|
||||
static const int ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
|
||||
static const int it = NumTraits<RealScalar>::digits(); // number of base-beta digits in mantissa
|
||||
static const int iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
|
||||
static const int iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
|
||||
static const RealScalar rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
|
||||
static const RealScalar b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(-((1-iemin)/2)))); // lower boundary of midrange
|
||||
static const RealScalar b2 = RealScalar(pow(RealScalar(ibeta),RealScalar((iemax + 1 - it)/2))); // upper boundary of midrange
|
||||
static const RealScalar s1m = RealScalar(pow(RealScalar(ibeta),RealScalar((2-iemin)/2))); // scaling factor for lower range
|
||||
static const RealScalar s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(- ((iemax+it)/2)))); // scaling factor for upper range
|
||||
static const RealScalar eps = RealScalar(pow(double(ibeta), 1-it));
|
||||
static const RealScalar relerr = sqrt(eps); // tolerance for neglecting asml
|
||||
|
||||
const Derived& vec(_vec.derived());
|
||||
|
||||
#if EIGEN_HAS_CXX11_ATOMIC
|
||||
enum {
|
||||
kNotInitialized = 0,
|
||||
kInitializing = 1,
|
||||
kInitialized = 2
|
||||
};
|
||||
static std::atomic<uint8_t> initialized{kNotInitialized};
|
||||
#else
|
||||
// Note: This is not theadsafe without C++11.
|
||||
const bool kNotInitialized = false;
|
||||
const bool kInitialized = true;
|
||||
static bool initialized = kNotInitialized;
|
||||
#endif
|
||||
static RealScalar b1, b2, s1m, s2m, rbig, relerr;
|
||||
|
||||
while (initialized != kInitialized)
|
||||
{
|
||||
#if EIGEN_HAS_CXX11_ATOMIC
|
||||
// Checking again to see if another thread has already initialized the constants.
|
||||
uint8_t val = kNotInitialized;
|
||||
if (initialized.compare_exchange_strong(val, kInitializing) && val != kInitialized) {
|
||||
#endif
|
||||
int ibeta, it, iemin, iemax, iexp;
|
||||
RealScalar eps;
|
||||
// This program calculates the machine-dependent constants
|
||||
// bl, b2, slm, s2m, relerr overfl
|
||||
// from the "basic" machine-dependent numbers
|
||||
// nbig, ibeta, it, iemin, iemax, rbig.
|
||||
// The following define the basic machine-dependent constants.
|
||||
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
|
||||
// are used. For any specific computer, each of the assignment
|
||||
// statements can be replaced
|
||||
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
|
||||
it = NumTraits<RealScalar>::digits(); // number of base-beta digits in mantissa
|
||||
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
|
||||
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
|
||||
rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
|
||||
|
||||
iexp = -((1-iemin)/2);
|
||||
b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
|
||||
iexp = (iemax + 1 - it)/2;
|
||||
b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
|
||||
|
||||
iexp = (2-iemin)/2;
|
||||
s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
|
||||
iexp = - ((iemax+it)/2);
|
||||
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
|
||||
|
||||
eps = RealScalar(pow(double(ibeta), 1-it));
|
||||
relerr = sqrt(eps); // tolerance for neglecting asml
|
||||
initialized = kInitialized;
|
||||
#if EIGEN_HAS_CXX11_ATOMIC
|
||||
}
|
||||
#endif
|
||||
}
|
||||
Index n = vec.size();
|
||||
RealScalar ab2 = b2 / RealScalar(n);
|
||||
RealScalar asml = RealScalar(0);
|
||||
|
@ -31,12 +31,12 @@ namespace bfloat16_impl {
|
||||
|
||||
// Make our own __bfloat16_raw definition.
|
||||
struct __bfloat16_raw {
|
||||
EIGEN_DEVICE_FUNC __bfloat16_raw() : value(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC __bfloat16_raw(unsigned short raw) : value(raw) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw() : value(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
|
||||
unsigned short value;
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
|
||||
template <bool AssumeArgumentIsNormalOrInfinityOrZero>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff);
|
||||
// Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:
|
||||
@ -48,8 +48,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne<true
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);
|
||||
|
||||
struct bfloat16_base : public __bfloat16_raw {
|
||||
EIGEN_DEVICE_FUNC bfloat16_base() {}
|
||||
EIGEN_DEVICE_FUNC bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw& h) : __bfloat16_raw(h) {}
|
||||
};
|
||||
|
||||
} // namespace bfloat16_impl
|
||||
@ -59,15 +59,15 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
|
||||
|
||||
typedef bfloat16_impl::__bfloat16_raw __bfloat16_raw;
|
||||
|
||||
EIGEN_DEVICE_FUNC bfloat16() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw& h) : bfloat16_impl::bfloat16_base(h) {}
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC bfloat16(bool b)
|
||||
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(bool b)
|
||||
: bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
|
||||
|
||||
template<class T>
|
||||
explicit EIGEN_DEVICE_FUNC bfloat16(const T& val)
|
||||
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const T& val)
|
||||
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC bfloat16(float f)
|
||||
@ -76,7 +76,7 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
|
||||
// Following the convention of numpy, converting between complex and
|
||||
// float will lead to loss of imag value.
|
||||
template<typename RealScalar>
|
||||
explicit EIGEN_DEVICE_FUNC bfloat16(const std::complex<RealScalar>& val)
|
||||
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
|
||||
: bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
|
||||
@ -272,10 +272,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const
|
||||
return output;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value) {
|
||||
__bfloat16_raw h;
|
||||
h.value = value;
|
||||
return h;
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value) {
|
||||
return __bfloat16_raw(value);
|
||||
}
|
||||
|
||||
// float_to_bfloat16_rtne template specialization that does not make any
|
||||
@ -619,20 +617,23 @@ template<> struct NumTraits<Eigen::bfloat16>
|
||||
RequireInitialization = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 epsilon() {
|
||||
return bfloat16_impl::raw_uint16_to_bfloat16(0x3c00);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() { return Eigen::bfloat16(5e-2f); }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 dummy_precision() {
|
||||
return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
|
||||
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 highest() {
|
||||
return bfloat16_impl::raw_uint16_to_bfloat16(0x7F7F);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 lowest() {
|
||||
return bfloat16_impl::raw_uint16_to_bfloat16(0xFF7F);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 infinity() {
|
||||
return bfloat16_impl::raw_uint16_to_bfloat16(0x7f80);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::bfloat16 quiet_NaN() {
|
||||
return bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0);
|
||||
}
|
||||
};
|
||||
|
@ -67,8 +67,8 @@ namespace half_impl {
|
||||
#if !defined(EIGEN_HAS_GPU_FP16)
|
||||
// Make our own __half_raw definition that is similar to CUDA's.
|
||||
struct __half_raw {
|
||||
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw() : x(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw(unsigned short raw) : x(raw) {}
|
||||
unsigned short x;
|
||||
};
|
||||
#elif defined(EIGEN_HAS_HIP_FP16)
|
||||
@ -85,20 +85,20 @@ typedef cl::sycl::half __half_raw;
|
||||
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
|
||||
|
||||
struct half_base : public __half_raw {
|
||||
EIGEN_DEVICE_FUNC half_base() {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half_raw& h) : __half_raw(h) {}
|
||||
|
||||
#if defined(EIGEN_HAS_GPU_FP16)
|
||||
#if defined(EIGEN_HAS_HIP_FP16)
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) { x = __half_as_ushort(h); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) { x = __half_as_ushort(h); }
|
||||
#elif defined(EIGEN_HAS_CUDA_FP16)
|
||||
#if (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000)
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
@ -125,22 +125,22 @@ struct half : public half_impl::half_base {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC half() {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half_raw& h) : half_impl::half_base(h) {}
|
||||
|
||||
#if defined(EIGEN_HAS_GPU_FP16)
|
||||
#if defined(EIGEN_HAS_HIP_FP16)
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}
|
||||
#elif defined(EIGEN_HAS_CUDA_FP16)
|
||||
#if defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(const __half& h) : half_impl::half_base(h) {}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC half(bool b)
|
||||
explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR half(bool b)
|
||||
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
|
||||
template<class T>
|
||||
explicit EIGEN_DEVICE_FUNC half(const T& val)
|
||||
@ -417,10 +417,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
|
||||
// these in hardware. If we need more performance on older/other CPUs, they are
|
||||
// also possible to vectorize directly.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
|
||||
__half_raw h;
|
||||
h.x = x;
|
||||
return h;
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(unsigned short x) {
|
||||
return __half_raw(x);
|
||||
}
|
||||
|
||||
union float32_bits {
|
||||
@ -666,20 +664,22 @@ template<> struct NumTraits<Eigen::half>
|
||||
RequireInitialization = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half epsilon() {
|
||||
return half_impl::raw_uint16_to_half(0x0800);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half dummy_precision() {
|
||||
return half_impl::raw_uint16_to_half(0x211f); // Eigen::half(1e-2f);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half highest() {
|
||||
return half_impl::raw_uint16_to_half(0x7bff);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half lowest() {
|
||||
return half_impl::raw_uint16_to_half(0xfbff);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half infinity() {
|
||||
return half_impl::raw_uint16_to_half(0x7c00);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
|
||||
return half_impl::raw_uint16_to_half(0x7c01);
|
||||
}
|
||||
};
|
||||
|
@ -696,6 +696,12 @@
|
||||
|
||||
#endif // EIGEN_HAS_CONSTEXPR
|
||||
|
||||
#if EIGEN_HAS_CONSTEXPR
|
||||
#define EIGEN_CONSTEXPR constexpr
|
||||
#else
|
||||
#define EIGEN_CONSTEXPR
|
||||
#endif
|
||||
|
||||
// Does the compiler support C++11 math?
|
||||
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
|
||||
#ifndef EIGEN_HAS_CXX11_MATH
|
||||
|
@ -78,7 +78,7 @@ typedef mp::number<mp::cpp_dec_float<100>, mp::et_on> Real;
|
||||
|
||||
namespace Eigen {
|
||||
template<> struct NumTraits<Real> : GenericNumTraits<Real> {
|
||||
static inline Real dummy_precision() { return 1e-50; }
|
||||
static EIGEN_CONSTEXPR inline Real dummy_precision() { return 1e-50; }
|
||||
};
|
||||
|
||||
template<typename T1,typename T2,typename T3,typename T4,typename T5>
|
||||
@ -205,4 +205,3 @@ EIGEN_DECLARE_TEST(boostmultiprec)
|
||||
|
||||
CALL_SUBTEST_11(( test_simplicial_cholesky_T<Real,int>() ));
|
||||
}
|
||||
|
||||
|
@ -75,10 +75,10 @@ template<Index n> struct NumTraits<type2index<n> >
|
||||
MulCost = 1
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real epsilon() { return 0; }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real dummy_precision() { return 0; }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real highest() { return n; }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real lowest() { return n; }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real epsilon() { return 0; }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real dummy_precision() { return 0; }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real highest() { return n; }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR EIGEN_STRONG_INLINE Real lowest() { return n; }
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
@ -44,13 +44,6 @@
|
||||
typename internal::enable_if< ( __condition__ ) , int >::type = 0
|
||||
|
||||
|
||||
#if EIGEN_HAS_CONSTEXPR
|
||||
#define EIGEN_CONSTEXPR constexpr
|
||||
#else
|
||||
#define EIGEN_CONSTEXPR
|
||||
#endif
|
||||
|
||||
|
||||
#if EIGEN_OS_WIN || EIGEN_OS_WIN64
|
||||
#define EIGEN_SLEEP(n) Sleep(n)
|
||||
#elif EIGEN_OS_GNULINUX
|
||||
|
Loading…
x
Reference in New Issue
Block a user