mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 01:29:35 +08:00
Merged eigen/eigen into default
This commit is contained in:
commit
5a59452aae
@ -216,7 +216,7 @@ pandnot(const Packet& a, const Packet& b) { return a & (~b); }
|
|||||||
|
|
||||||
/** \internal \returns ones */
|
/** \internal \returns ones */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
ptrue(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;}
|
ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;}
|
||||||
|
|
||||||
template <typename RealScalar>
|
template <typename RealScalar>
|
||||||
EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) {
|
EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) {
|
||||||
|
@ -308,18 +308,18 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<
|
|||||||
template<> EIGEN_DEVICE_FUNC inline Packet4cd pgather<std::complex<double>, Packet4cd>(const std::complex<double>* from, Index stride)
|
template<> EIGEN_DEVICE_FUNC inline Packet4cd pgather<std::complex<double>, Packet4cd>(const std::complex<double>* from, Index stride)
|
||||||
{
|
{
|
||||||
return Packet4cd(_mm512_insertf64x4(_mm512_castpd256_pd512(
|
return Packet4cd(_mm512_insertf64x4(_mm512_castpd256_pd512(
|
||||||
_mm256_insertf128_pd(_mm256_castpd128_pd256(pload<Packet1cd>(from+0*stride).v), pload<Packet1cd>(from+1*stride).v,1)),
|
_mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+0*stride).v), ploadu<Packet1cd>(from+1*stride).v,1)),
|
||||||
_mm256_insertf128_pd(_mm256_castpd128_pd256(pload<Packet1cd>(from+2*stride).v), pload<Packet1cd>(from+3*stride).v,1), 1));
|
_mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+2*stride).v), ploadu<Packet1cd>(from+3*stride).v,1), 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet4cd>(std::complex<double>* to, const Packet4cd& from, Index stride)
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet4cd>(std::complex<double>* to, const Packet4cd& from, Index stride)
|
||||||
{
|
{
|
||||||
__m512i fromi = _mm512_castpd_si512(from.v);
|
__m512i fromi = _mm512_castpd_si512(from.v);
|
||||||
double* tod = (double*)(void*)to;
|
double* tod = (double*)(void*)to;
|
||||||
_mm_store_pd(tod+0*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,0)) );
|
_mm_storeu_pd(tod+0*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,0)) );
|
||||||
_mm_store_pd(tod+2*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,1)) );
|
_mm_storeu_pd(tod+2*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,1)) );
|
||||||
_mm_store_pd(tod+4*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,2)) );
|
_mm_storeu_pd(tod+4*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,2)) );
|
||||||
_mm_store_pd(tod+6*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,3)) );
|
_mm_storeu_pd(tod+6*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,3)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet4cd>(const Packet4cd& a)
|
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet4cd>(const Packet4cd& a)
|
||||||
|
@ -57,7 +57,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasBlend = 0,
|
HasBlend = 0,
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG
|
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
#endif
|
#endif
|
||||||
@ -77,7 +77,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
|||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
size = 8,
|
size = 8,
|
||||||
HasHalfPacket = 1,
|
HasHalfPacket = 1,
|
||||||
#if EIGEN_GNUC_AT_LEAST(5, 3)
|
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
HasSqrt = EIGEN_FAST_MATH,
|
||||||
HasRsqrt = EIGEN_FAST_MATH,
|
HasRsqrt = EIGEN_FAST_MATH,
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
//
|
//
|
||||||
// Copyright (C) 2007 Julien Pommier
|
// Copyright (C) 2007 Julien Pommier
|
||||||
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
|
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
|
||||||
// Copyright (C) 2009-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
|
// Copyright (C) 2009-2019 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||||
//
|
//
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
@ -253,15 +253,68 @@ Packet pexp_double(const Packet _x)
|
|||||||
return pmax(pldexp(x,fx), _x);
|
return pmax(pldexp(x,fx), _x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The code is the rewriting of the cephes sinf/cosf functions.
|
// The following code is inspired by the following stack-overflow answer:
|
||||||
Precision is excellent as long as x < 8192 (I did not bother to
|
// https://stackoverflow.com/questions/30463616/payne-hanek-algorithm-implementation-in-c/30465751#30465751
|
||||||
take into account the special handling they have for greater values
|
// It has been largely optimized:
|
||||||
-- it does not return garbage for arguments over 8192, though, but
|
// - By-pass calls to frexp.
|
||||||
the extra precision is missing).
|
// - Aligned loads of required 96 bits of 2/pi. This is accomplished by
|
||||||
|
// (1) balancing the mantissa and exponent to the required bits of 2/pi are
|
||||||
|
// aligned on 8-bits, and (2) replicating the storage of the bits of 2/pi.
|
||||||
|
// - Avoid a branch in rounding and extraction of the remaining fractional part.
|
||||||
|
// Overall, I measured a speed up higher than x2 on x86-64.
|
||||||
|
inline float trig_reduce_huge (float xf, int *quadrant)
|
||||||
|
{
|
||||||
|
using Eigen::numext::int32_t;
|
||||||
|
using Eigen::numext::uint32_t;
|
||||||
|
using Eigen::numext::int64_t;
|
||||||
|
using Eigen::numext::uint64_t;
|
||||||
|
|
||||||
Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
|
const double pio2_62 = 3.4061215800865545e-19; // pi/2 * 2^-62
|
||||||
surprising but correct result.
|
const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point foramt
|
||||||
*/
|
|
||||||
|
// 192 bits of 2/pi for Payne-Hanek reduction
|
||||||
|
// Bits are introduced by packet of 8 to enable aligned reads.
|
||||||
|
static const uint32_t two_over_pi [] =
|
||||||
|
{
|
||||||
|
0x00000028, 0x000028be, 0x0028be60, 0x28be60db,
|
||||||
|
0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a,
|
||||||
|
0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4,
|
||||||
|
0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770,
|
||||||
|
0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566,
|
||||||
|
0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410,
|
||||||
|
0x10e41000, 0xe4100000
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t xi = numext::as_uint(xf);
|
||||||
|
// Below, -118 = -126 + 8.
|
||||||
|
// -126 is to get the exponent,
|
||||||
|
// +8 is to enable alignment of 2/pi's bits on 8 bits.
|
||||||
|
// This is possible because the fractional part of x as only 24 meaningful bits.
|
||||||
|
uint32_t e = (xi >> 23) - 118;
|
||||||
|
// Extract the mantissa and shift it to align it wrt the exponent
|
||||||
|
xi = ((xi & 0x007fffffu)| 0x00800000u) << (e & 0x7);
|
||||||
|
|
||||||
|
uint32_t i = e >> 3;
|
||||||
|
uint32_t twoopi_1 = two_over_pi[i-1];
|
||||||
|
uint32_t twoopi_2 = two_over_pi[i+3];
|
||||||
|
uint32_t twoopi_3 = two_over_pi[i+7];
|
||||||
|
|
||||||
|
// Compute x * 2/pi in 2.62-bit fixed-point format.
|
||||||
|
uint64_t p;
|
||||||
|
p = uint64_t(xi) * twoopi_3;
|
||||||
|
p = uint64_t(xi) * twoopi_2 + (p >> 32);
|
||||||
|
p = (uint64_t(xi * twoopi_1) << 32) + p;
|
||||||
|
|
||||||
|
// Round to nearest: add 0.5 and extract integral part.
|
||||||
|
uint64_t q = (p + zero_dot_five) >> 62;
|
||||||
|
*quadrant = int(q);
|
||||||
|
// Now it remains to compute "r = x - q*pi/2" with high accuracy,
|
||||||
|
// since we have p=x/(pi/2) with high accuracy, we can more efficiently compute r as:
|
||||||
|
// r = (p-q)*pi/2,
|
||||||
|
// where the product can be be carried out with sufficient accuracy using double precision.
|
||||||
|
p -= q<<62;
|
||||||
|
return float(double(int64_t(p)) * pio2_62);
|
||||||
|
}
|
||||||
|
|
||||||
template<bool ComputeSine,typename Packet>
|
template<bool ComputeSine,typename Packet>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
@ -285,17 +338,6 @@ Packet psincos_float(const Packet& _x)
|
|||||||
PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
|
PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
|
||||||
y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
|
y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
|
||||||
|
|
||||||
// Compute the sign to apply to the polynomial.
|
|
||||||
// sin: sign = second_bit(y_int) xor signbit(_x)
|
|
||||||
// cos: sign = second_bit(y_int+1)
|
|
||||||
Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(pshiftleft<30>(y_int)))
|
|
||||||
: preinterpret<Packet>(pshiftleft<30>(padd(y_int,csti_1)));
|
|
||||||
sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit
|
|
||||||
|
|
||||||
// Get the polynomial selection mask from the second bit of y_int
|
|
||||||
// We'll calculate both (sin and cos) polynomials and then select from the two.
|
|
||||||
Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int)));
|
|
||||||
|
|
||||||
// Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4
|
// Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4
|
||||||
// using "Extended precision modular arithmetic"
|
// using "Extended precision modular arithmetic"
|
||||||
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
|
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
|
||||||
@ -332,29 +374,36 @@ Packet psincos_float(const Packet& _x)
|
|||||||
// The coefficients are: 0xbfc90f80, 0xb7354480, 0x2e74b9ee
|
// The coefficients are: 0xbfc90f80, 0xb7354480, 0x2e74b9ee
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// We use huge_vals as a temporary for abs(_x) to ensure huge_vals
|
if(predux_any(pcmp_le(pset1<Packet>(huge_th),pabs(_x))))
|
||||||
// is fully initialized for the last pselect(). (prevent compiler warning)
|
|
||||||
Packet huge_vals = pabs(_x);
|
|
||||||
Packet huge_mask = pcmp_le(pset1<Packet>(huge_th),huge_vals);
|
|
||||||
|
|
||||||
if(predux_any(huge_mask))
|
|
||||||
{
|
{
|
||||||
const int PacketSize = unpacket_traits<Packet>::size;
|
const int PacketSize = unpacket_traits<Packet>::size;
|
||||||
#if EIGEN_HAS_CXX11
|
|
||||||
alignas(Packet) float vals[PacketSize];
|
|
||||||
#else
|
|
||||||
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float vals[PacketSize];
|
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float vals[PacketSize];
|
||||||
#endif
|
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float x_cpy[PacketSize];
|
||||||
pstoreu(vals, _x);
|
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) int y_int2[PacketSize];
|
||||||
for(int k=0; k<PacketSize;++k) {
|
pstoreu(vals, pabs(_x));
|
||||||
|
pstoreu(x_cpy, x);
|
||||||
|
pstoreu(y_int2, y_int);
|
||||||
|
for(int k=0; k<PacketSize;++k)
|
||||||
|
{
|
||||||
float val = vals[k];
|
float val = vals[k];
|
||||||
if(numext::abs(val)>=huge_th) {
|
if(val>=huge_th && (numext::isfinite)(val))
|
||||||
vals[k] = ComputeSine ? std::sin(val) : std::cos(val);
|
x_cpy[k] = trig_reduce_huge(val,&y_int2[k]);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
huge_vals = ploadu<Packet>(vals);
|
x = ploadu<Packet>(x_cpy);
|
||||||
|
y_int = ploadu<PacketI>(y_int2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute the sign to apply to the polynomial.
|
||||||
|
// sin: sign = second_bit(y_int) xor signbit(_x)
|
||||||
|
// cos: sign = second_bit(y_int+1)
|
||||||
|
Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(pshiftleft<30>(y_int)))
|
||||||
|
: preinterpret<Packet>(pshiftleft<30>(padd(y_int,csti_1)));
|
||||||
|
sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit
|
||||||
|
|
||||||
|
// Get the polynomial selection mask from the second bit of y_int
|
||||||
|
// We'll calculate both (sin and cos) polynomials and then select from the two.
|
||||||
|
Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int)));
|
||||||
|
|
||||||
Packet x2 = pmul(x,x);
|
Packet x2 = pmul(x,x);
|
||||||
|
|
||||||
// Evaluate the cos(x) polynomial. (-Pi/4 <= x <= Pi/4)
|
// Evaluate the cos(x) polynomial. (-Pi/4 <= x <= Pi/4)
|
||||||
@ -383,7 +432,7 @@ Packet psincos_float(const Packet& _x)
|
|||||||
: pselect(poly_mask,y1,y2);
|
: pselect(poly_mask,y1,y2);
|
||||||
|
|
||||||
// Update the sign and filter huge inputs
|
// Update the sign and filter huge inputs
|
||||||
return pselect(huge_mask, huge_vals, pxor(y, sign_bit));
|
return pxor(y, sign_bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Packet>
|
template<typename Packet>
|
||||||
|
@ -29,10 +29,15 @@
|
|||||||
*
|
*
|
||||||
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
|
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
|
||||||
* vectorized and non-vectorized code.
|
* vectorized and non-vectorized code.
|
||||||
|
*
|
||||||
|
* FIXME: this code can be cleaned up once we switch to proper C++11 only.
|
||||||
*/
|
*/
|
||||||
#if (defined EIGEN_CUDACC)
|
#if (defined EIGEN_CUDACC)
|
||||||
#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
|
#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
|
||||||
#define EIGEN_ALIGNOF(x) __alignof(x)
|
#define EIGEN_ALIGNOF(x) __alignof(x)
|
||||||
|
#elif EIGEN_HAS_ALIGNAS
|
||||||
|
#define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
|
||||||
|
#define EIGEN_ALIGNOF(x) alignof(x)
|
||||||
#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
|
#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
|
||||||
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
|
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
|
||||||
#define EIGEN_ALIGNOF(x) __alignof(x)
|
#define EIGEN_ALIGNOF(x) __alignof(x)
|
||||||
@ -44,7 +49,7 @@
|
|||||||
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
|
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
|
||||||
#define EIGEN_ALIGNOF(x) __alignof(x)
|
#define EIGEN_ALIGNOF(x) __alignof(x)
|
||||||
#else
|
#else
|
||||||
#error Please tell me what is the equivalent of __attribute__((aligned(n))) and __alignof(x) for your compiler
|
#error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// If the user explicitly disable vectorization, then we also disable alignment
|
// If the user explicitly disable vectorization, then we also disable alignment
|
||||||
|
@ -129,16 +129,21 @@
|
|||||||
#define EIGEN_COMP_MSVC_STRICT 0
|
#define EIGEN_COMP_MSVC_STRICT 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++
|
/// \internal EIGEN_COMP_IBM set to xlc version if the compiler is IBM XL C++
|
||||||
#if defined(__IBMCPP__) || defined(__xlc__)
|
// XLC version
|
||||||
#define EIGEN_COMP_IBM 1
|
// 3.1 0x0301
|
||||||
|
// 4.5 0x0405
|
||||||
|
// 5.0 0x0500
|
||||||
|
// 12.1 0x0C01
|
||||||
|
#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__)
|
||||||
|
#define EIGEN_COMP_IBM __xlC__
|
||||||
#else
|
#else
|
||||||
#define EIGEN_COMP_IBM 0
|
#define EIGEN_COMP_IBM 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler
|
/// \internal EIGEN_COMP_PGI set to PGI version if the compiler is Portland Group Compiler
|
||||||
#if defined(__PGI)
|
#if defined(__PGI)
|
||||||
#define EIGEN_COMP_PGI 1
|
#define EIGEN_COMP_PGI (__PGIC__*100+__PGIC_MINOR__)
|
||||||
#else
|
#else
|
||||||
#define EIGEN_COMP_PGI 0
|
#define EIGEN_COMP_PGI 0
|
||||||
#endif
|
#endif
|
||||||
@ -347,9 +352,17 @@
|
|||||||
#define EIGEN_OS_WIN_STRICT 0
|
#define EIGEN_OS_WIN_STRICT 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN
|
/// \internal EIGEN_OS_SUN set to __SUNPRO_C if the OS is SUN
|
||||||
|
// compiler solaris __SUNPRO_C
|
||||||
|
// version studio
|
||||||
|
// 5.7 10 0x570
|
||||||
|
// 5.8 11 0x580
|
||||||
|
// 5.9 12 0x590
|
||||||
|
// 5.10 12.1 0x5100
|
||||||
|
// 5.11 12.2 0x5110
|
||||||
|
// 5.12 12.3 0x5120
|
||||||
#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
|
#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
|
||||||
#define EIGEN_OS_SUN 1
|
#define EIGEN_OS_SUN __SUNPRO_C
|
||||||
#else
|
#else
|
||||||
#define EIGEN_OS_SUN 0
|
#define EIGEN_OS_SUN 0
|
||||||
#endif
|
#endif
|
||||||
@ -546,6 +559,22 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef EIGEN_HAS_ALIGNAS
|
||||||
|
#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 && \
|
||||||
|
( __has_feature(cxx_alignas) \
|
||||||
|
|| EIGEN_HAS_CXX14 \
|
||||||
|
|| (EIGEN_COMP_MSVC >= 1800) \
|
||||||
|
|| (EIGEN_GNUC_AT_LEAST(4,8)) \
|
||||||
|
|| (EIGEN_COMP_CLANG>=305) \
|
||||||
|
|| (EIGEN_COMP_ICC>=1500) \
|
||||||
|
|| (EIGEN_COMP_PGI>=1500) \
|
||||||
|
|| (EIGEN_COMP_SUN>=0x5130))
|
||||||
|
#define EIGEN_HAS_ALIGNAS 1
|
||||||
|
#else
|
||||||
|
#define EIGEN_HAS_ALIGNAS 0
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// Does the compiler support type_traits?
|
// Does the compiler support type_traits?
|
||||||
// - full support of type traits was added only to GCC 5.1.0.
|
// - full support of type traits was added only to GCC 5.1.0.
|
||||||
// - 20150626 corresponds to the last release of 4.x libstdc++
|
// - 20150626 corresponds to the last release of 4.x libstdc++
|
||||||
|
@ -636,8 +636,41 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
|||||||
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
|
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/** \internal extract the bits of the float \a x */
|
||||||
|
inline unsigned int as_uint(float x)
|
||||||
|
{
|
||||||
|
unsigned int ret;
|
||||||
|
std::memcpy(&ret, &x, sizeof(float));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace numext
|
} // end namespace numext
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
// Define portable (u)int{32,64} types
|
||||||
|
#if EIGEN_HAS_CXX11
|
||||||
|
#include <cstdint>
|
||||||
|
namespace Eigen {
|
||||||
|
namespace numext {
|
||||||
|
typedef std::uint32_t uint32_t;
|
||||||
|
typedef std::int32_t int32_t;
|
||||||
|
typedef std::uint64_t uint64_t;
|
||||||
|
typedef std::int64_t int64_t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// Without c++11, all compilers able to compile Eigen also
|
||||||
|
// provides the C99 stdint.h header file.
|
||||||
|
#include <stdint.h>
|
||||||
|
namespace Eigen {
|
||||||
|
namespace numext {
|
||||||
|
typedef ::uint32_t uint32_t;
|
||||||
|
typedef ::int32_t int32_t;
|
||||||
|
typedef ::uint64_t uint64_t;
|
||||||
|
typedef ::int64_t int64_t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // EIGEN_META_H
|
#endif // EIGEN_META_H
|
||||||
|
@ -97,6 +97,9 @@ template<int Mode> struct transform_make_affine;
|
|||||||
* - #AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
|
* - #AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
|
||||||
* - #Projective: the transformation is stored as a (Dim+1)^2 matrix
|
* - #Projective: the transformation is stored as a (Dim+1)^2 matrix
|
||||||
* without any assumption.
|
* without any assumption.
|
||||||
|
* - #Isometry: same as #Affine with the additional assumption that
|
||||||
|
* the linear part represents a rotation. This assumption is exploited
|
||||||
|
* to speed up some functions such as inverse() and rotation().
|
||||||
* \tparam _Options has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor.
|
* \tparam _Options has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor.
|
||||||
* These Options are passed directly to the underlying matrix type.
|
* These Options are passed directly to the underlying matrix type.
|
||||||
*
|
*
|
||||||
@ -252,11 +255,11 @@ protected:
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
/** Default constructor without initialization of the meaningful coefficients.
|
/** Default constructor without initialization of the meaningful coefficients.
|
||||||
* If Mode==Affine, then the last row is set to [0 ... 0 1] */
|
* If Mode==Affine or Mode==Isometry, then the last row is set to [0 ... 0 1] */
|
||||||
EIGEN_DEVICE_FUNC inline Transform()
|
EIGEN_DEVICE_FUNC inline Transform()
|
||||||
{
|
{
|
||||||
check_template_params();
|
check_template_params();
|
||||||
internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
|
internal::transform_make_affine<(int(Mode)==Affine || int(Mode)==Isometry) ? Affine : AffineCompact>::run(m_matrix);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Transform(const Transform& other)
|
EIGEN_DEVICE_FUNC inline Transform(const Transform& other)
|
||||||
|
@ -18,6 +18,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
|
|||||||
{
|
{
|
||||||
typedef MatrixXpr XprKind;
|
typedef MatrixXpr XprKind;
|
||||||
typedef SolverStorage StorageKind;
|
typedef SolverStorage StorageKind;
|
||||||
|
typedef int StorageIndex;
|
||||||
enum { Flags = 0 };
|
enum { Flags = 0 };
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -64,7 +65,6 @@ template<typename _MatrixType> class FullPivLU
|
|||||||
typedef SolverBase<FullPivLU> Base;
|
typedef SolverBase<FullPivLU> Base;
|
||||||
|
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)
|
EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)
|
||||||
// FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
|
|
||||||
enum {
|
enum {
|
||||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
||||||
@ -529,8 +529,8 @@ void FullPivLU<MatrixType>::computeInPlace()
|
|||||||
m_nonzero_pivots = k;
|
m_nonzero_pivots = k;
|
||||||
for(Index i = k; i < size; ++i)
|
for(Index i = k; i < size; ++i)
|
||||||
{
|
{
|
||||||
m_rowsTranspositions.coeffRef(i) = i;
|
m_rowsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i);
|
||||||
m_colsTranspositions.coeffRef(i) = i;
|
m_colsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -541,8 +541,8 @@ void FullPivLU<MatrixType>::computeInPlace()
|
|||||||
// Now that we've found the pivot, we need to apply the row/col swaps to
|
// Now that we've found the pivot, we need to apply the row/col swaps to
|
||||||
// bring it to the location (k,k).
|
// bring it to the location (k,k).
|
||||||
|
|
||||||
m_rowsTranspositions.coeffRef(k) = row_of_biggest_in_corner;
|
m_rowsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(row_of_biggest_in_corner);
|
||||||
m_colsTranspositions.coeffRef(k) = col_of_biggest_in_corner;
|
m_colsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(col_of_biggest_in_corner);
|
||||||
if(k != row_of_biggest_in_corner) {
|
if(k != row_of_biggest_in_corner) {
|
||||||
m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner));
|
m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner));
|
||||||
++number_of_transpositions;
|
++number_of_transpositions;
|
||||||
|
@ -19,6 +19,7 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
|
|||||||
{
|
{
|
||||||
typedef MatrixXpr XprKind;
|
typedef MatrixXpr XprKind;
|
||||||
typedef SolverStorage StorageKind;
|
typedef SolverStorage StorageKind;
|
||||||
|
typedef int StorageIndex;
|
||||||
typedef traits<_MatrixType> BaseTraits;
|
typedef traits<_MatrixType> BaseTraits;
|
||||||
enum {
|
enum {
|
||||||
Flags = BaseTraits::Flags & RowMajorBit,
|
Flags = BaseTraits::Flags & RowMajorBit,
|
||||||
@ -80,7 +81,6 @@ template<typename _MatrixType> class PartialPivLU
|
|||||||
typedef _MatrixType MatrixType;
|
typedef _MatrixType MatrixType;
|
||||||
typedef SolverBase<PartialPivLU> Base;
|
typedef SolverBase<PartialPivLU> Base;
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
|
EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
|
||||||
// FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
|
|
||||||
enum {
|
enum {
|
||||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
||||||
|
@ -612,6 +612,62 @@ template<typename Scalar, int Dim, int Options> void transform_products()
|
|||||||
VERIFY_IS_APPROX((ac*p).matrix(), a_m*p_m);
|
VERIFY_IS_APPROX((ac*p).matrix(), a_m*p_m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Scalar, int Mode, int Options> void transformations_no_scale()
|
||||||
|
{
|
||||||
|
/* this test covers the following files:
|
||||||
|
Cross.h Quaternion.h, Transform.h
|
||||||
|
*/
|
||||||
|
typedef Matrix<Scalar,3,1> Vector3;
|
||||||
|
typedef Matrix<Scalar,4,1> Vector4;
|
||||||
|
typedef Quaternion<Scalar> Quaternionx;
|
||||||
|
typedef AngleAxis<Scalar> AngleAxisx;
|
||||||
|
typedef Transform<Scalar,3,Mode,Options> Transform3;
|
||||||
|
typedef Translation<Scalar,3> Translation3;
|
||||||
|
typedef Matrix<Scalar,4,4> Matrix4;
|
||||||
|
|
||||||
|
Vector3 v0 = Vector3::Random(),
|
||||||
|
v1 = Vector3::Random();
|
||||||
|
|
||||||
|
Transform3 t0, t1, t2;
|
||||||
|
|
||||||
|
Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI));
|
||||||
|
|
||||||
|
Quaternionx q1, q2;
|
||||||
|
|
||||||
|
q1 = AngleAxisx(a, v0.normalized());
|
||||||
|
|
||||||
|
t0 = Transform3::Identity();
|
||||||
|
VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity());
|
||||||
|
|
||||||
|
t0.setIdentity();
|
||||||
|
t1.setIdentity();
|
||||||
|
v1 = Vector3::Ones();
|
||||||
|
t0.linear() = q1.toRotationMatrix();
|
||||||
|
t0.pretranslate(v0);
|
||||||
|
t1.linear() = q1.conjugate().toRotationMatrix();
|
||||||
|
t1.translate(-v0);
|
||||||
|
|
||||||
|
VERIFY((t0 * t1).matrix().isIdentity(test_precision<Scalar>()));
|
||||||
|
|
||||||
|
t1.fromPositionOrientationScale(v0, q1, v1);
|
||||||
|
VERIFY_IS_APPROX(t1.matrix(), t0.matrix());
|
||||||
|
VERIFY_IS_APPROX(t1*v1, t0*v1);
|
||||||
|
|
||||||
|
// translation * vector
|
||||||
|
t0.setIdentity();
|
||||||
|
t0.translate(v0);
|
||||||
|
VERIFY_IS_APPROX((t0 * v1).template head<3>(), Translation3(v0) * v1);
|
||||||
|
|
||||||
|
// Conversion to matrix.
|
||||||
|
Transform3 t3;
|
||||||
|
t3.linear() = q1.toRotationMatrix();
|
||||||
|
t3.translation() = v1;
|
||||||
|
Matrix4 m3 = t3.matrix();
|
||||||
|
VERIFY((m3 * m3.inverse()).isIdentity(test_precision<Scalar>()));
|
||||||
|
// Verify implicit last row is initialized.
|
||||||
|
VERIFY_IS_APPROX(Vector4(m3.row(3)), Vector4(0.0, 0.0, 0.0, 1.0));
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DECLARE_TEST(geo_transformations)
|
EIGEN_DECLARE_TEST(geo_transformations)
|
||||||
{
|
{
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
@ -625,7 +681,7 @@ EIGEN_DECLARE_TEST(geo_transformations)
|
|||||||
CALL_SUBTEST_3(( transformations<double,Projective,AutoAlign>() ));
|
CALL_SUBTEST_3(( transformations<double,Projective,AutoAlign>() ));
|
||||||
CALL_SUBTEST_3(( transformations<double,Projective,DontAlign>() ));
|
CALL_SUBTEST_3(( transformations<double,Projective,DontAlign>() ));
|
||||||
CALL_SUBTEST_3(( transform_alignment<double>() ));
|
CALL_SUBTEST_3(( transform_alignment<double>() ));
|
||||||
|
|
||||||
CALL_SUBTEST_4(( transformations<float,Affine,RowMajor|AutoAlign>() ));
|
CALL_SUBTEST_4(( transformations<float,Affine,RowMajor|AutoAlign>() ));
|
||||||
CALL_SUBTEST_4(( non_projective_only<float,Affine,RowMajor>() ));
|
CALL_SUBTEST_4(( non_projective_only<float,Affine,RowMajor>() ));
|
||||||
|
|
||||||
@ -641,5 +697,8 @@ EIGEN_DECLARE_TEST(geo_transformations)
|
|||||||
|
|
||||||
CALL_SUBTEST_8(( transform_associativity<double,2,ColMajor>(Rotation2D<double>(internal::random<double>()*double(EIGEN_PI))) ));
|
CALL_SUBTEST_8(( transform_associativity<double,2,ColMajor>(Rotation2D<double>(internal::random<double>()*double(EIGEN_PI))) ));
|
||||||
CALL_SUBTEST_8(( transform_associativity<double,3,ColMajor>(Quaterniond::UnitRandom()) ));
|
CALL_SUBTEST_8(( transform_associativity<double,3,ColMajor>(Quaterniond::UnitRandom()) ));
|
||||||
|
|
||||||
|
CALL_SUBTEST_9(( transformations_no_scale<double,Affine,AutoAlign>() ));
|
||||||
|
CALL_SUBTEST_9(( transformations_no_scale<double,Isometry,AutoAlign>() ));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,8 @@ typename MatrixType::RealScalar matrix_l1_norm(const MatrixType& m) {
|
|||||||
|
|
||||||
template<typename MatrixType> void lu_non_invertible()
|
template<typename MatrixType> void lu_non_invertible()
|
||||||
{
|
{
|
||||||
|
STATIC_CHECK(( internal::is_same<typename FullPivLU<MatrixType>::StorageIndex,int>::value ));
|
||||||
|
|
||||||
typedef typename MatrixType::RealScalar RealScalar;
|
typedef typename MatrixType::RealScalar RealScalar;
|
||||||
/* this test covers the following files:
|
/* this test covers the following files:
|
||||||
LU.h
|
LU.h
|
||||||
@ -191,6 +193,8 @@ template<typename MatrixType> void lu_partial_piv()
|
|||||||
m1.setRandom();
|
m1.setRandom();
|
||||||
PartialPivLU<MatrixType> plu(m1);
|
PartialPivLU<MatrixType> plu(m1);
|
||||||
|
|
||||||
|
STATIC_CHECK(( internal::is_same<typename PartialPivLU<MatrixType>::StorageIndex,int>::value ));
|
||||||
|
|
||||||
VERIFY_IS_APPROX(m1, plu.reconstructedMatrix());
|
VERIFY_IS_APPROX(m1, plu.reconstructedMatrix());
|
||||||
|
|
||||||
m3 = MatrixType::Random(size,size);
|
m3 = MatrixType::Random(size,size);
|
||||||
|
@ -568,6 +568,7 @@ template<typename Scalar,typename Packet> void packetmath_real()
|
|||||||
h.store(data2, internal::plog(h.load(data1)));
|
h.store(data2, internal::plog(h.load(data1)));
|
||||||
VERIFY((numext::isinf)(data2[0]));
|
VERIFY((numext::isinf)(data2[0]));
|
||||||
}
|
}
|
||||||
|
if(PacketTraits::HasSqrt)
|
||||||
{
|
{
|
||||||
packet_helper<PacketTraits::HasSqrt,Packet> h;
|
packet_helper<PacketTraits::HasSqrt,Packet> h;
|
||||||
data1[0] = Scalar(-1.0f);
|
data1[0] = Scalar(-1.0f);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user