mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
merge
This commit is contained in:
commit
76236cdea4
@ -32,6 +32,7 @@
|
|||||||
* \endcode
|
* \endcode
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "src/misc/RealSvd2x2.h"
|
||||||
#include "src/Eigenvalues/Tridiagonalization.h"
|
#include "src/Eigenvalues/Tridiagonalization.h"
|
||||||
#include "src/Eigenvalues/RealSchur.h"
|
#include "src/Eigenvalues/RealSchur.h"
|
||||||
#include "src/Eigenvalues/EigenSolver.h"
|
#include "src/Eigenvalues/EigenSolver.h"
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
* \endcode
|
* \endcode
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "src/misc/RealSvd2x2.h"
|
||||||
#include "src/SVD/UpperBidiagonalization.h"
|
#include "src/SVD/UpperBidiagonalization.h"
|
||||||
#include "src/SVD/SVDBase.h"
|
#include "src/SVD/SVDBase.h"
|
||||||
#include "src/SVD/JacobiSVD.h"
|
#include "src/SVD/JacobiSVD.h"
|
||||||
|
@ -149,7 +149,7 @@ class Array
|
|||||||
|
|
||||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
Array(Array&& other)
|
Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||||
: Base(std::move(other))
|
: Base(std::move(other))
|
||||||
{
|
{
|
||||||
Base::_check_template_params();
|
Base::_check_template_params();
|
||||||
@ -157,7 +157,7 @@ class Array
|
|||||||
Base::_set_noalias(other);
|
Base::_set_noalias(other);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
Array& operator=(Array&& other)
|
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||||
{
|
{
|
||||||
other.swap(*this);
|
other.swap(*this);
|
||||||
return *this;
|
return *this;
|
||||||
|
@ -28,6 +28,8 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
|
|||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
size=2,
|
size=2,
|
||||||
HasHalfPacket = 0,
|
HasHalfPacket = 0,
|
||||||
|
HasAdd = 1,
|
||||||
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
|
@ -14,8 +14,9 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
|
const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||||
static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
|
static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );
|
||||||
|
static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );
|
||||||
|
|
||||||
//---------- float ----------
|
//---------- float ----------
|
||||||
struct Packet2cf
|
struct Packet2cf
|
||||||
@ -274,7 +275,8 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
|||||||
//---------- double ----------
|
//---------- double ----------
|
||||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||||
|
|
||||||
static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);
|
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
|
||||||
|
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
|
||||||
|
|
||||||
struct Packet1cd
|
struct Packet1cd
|
||||||
{
|
{
|
||||||
|
@ -49,17 +49,6 @@ typedef uint32x4_t Packet4ui;
|
|||||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||||
|
|
||||||
#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG
|
|
||||||
//Special treatment for Apple's llvm-gcc, its NEON packet types are unions
|
|
||||||
#define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}}
|
|
||||||
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
|
|
||||||
#else
|
|
||||||
//Default initializer for packets
|
|
||||||
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
|
|
||||||
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
|
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
|
||||||
// which available on LLVM and GCC (at least)
|
// which available on LLVM and GCC (at least)
|
||||||
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||||
@ -122,12 +111,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
|
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
|
||||||
{
|
{
|
||||||
Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
const float32_t f[] = {0, 1, 2, 3};
|
||||||
|
Packet4f countdown = vld1q_f32(f);
|
||||||
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
||||||
}
|
}
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
|
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
|
||||||
{
|
{
|
||||||
Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
|
const int32_t i[] = {0, 1, 2, 3};
|
||||||
|
Packet4i countdown = vld1q_s32(i);
|
||||||
return vaddq_s32(pset1<Packet4i>(a), countdown);
|
return vaddq_s32(pset1<Packet4i>(a), countdown);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -585,7 +576,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { r
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
|
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
|
||||||
{
|
{
|
||||||
Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1);
|
const double countdown_raw[] = {0.0,1.0};
|
||||||
|
const Packet2d countdown = vld1q_f64(countdown_raw);
|
||||||
return vaddq_f64(pset1<Packet2d>(a), countdown);
|
return vaddq_f64(pset1<Packet2d>(a), countdown);
|
||||||
}
|
}
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
|
||||||
|
@ -328,6 +328,30 @@ struct result_of<Func(ArgType0,ArgType1)> {
|
|||||||
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
|
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
|
||||||
typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
|
typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>
|
||||||
|
struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
|
||||||
|
|
||||||
|
template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
|
||||||
|
struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>
|
||||||
|
{typedef typename Func::result_type type;};
|
||||||
|
|
||||||
|
template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
|
||||||
|
struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>
|
||||||
|
{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};
|
||||||
|
|
||||||
|
template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
|
||||||
|
struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
|
||||||
|
template<typename T>
|
||||||
|
static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
|
||||||
|
template<typename T>
|
||||||
|
static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);
|
||||||
|
static has_none testFunctor(...);
|
||||||
|
|
||||||
|
// note that the following indirection is needed for gcc-3.3
|
||||||
|
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
|
||||||
|
typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
|
/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
|
||||||
|
@ -327,33 +327,22 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T
|
// We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T
|
||||||
// From the eigen decomposition of T = U * E * U^-1,
|
// Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00):
|
||||||
// we can extract the eigenvalues of (U^-1 * S * U) / E
|
|
||||||
// Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)].
|
|
||||||
// Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00):
|
|
||||||
|
|
||||||
// T = [a b ; 0 c]
|
// T = [a 0]
|
||||||
// S = [e f ; g h]
|
// [0 b]
|
||||||
RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1);
|
RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i+1, i+1);
|
||||||
RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1);
|
Matrix<RealScalar,2,2> S2 = m_matS.template block<2,2>(i,i) * Matrix<Scalar,2,1>(b,a).asDiagonal();
|
||||||
RealScalar d = c-a;
|
|
||||||
RealScalar gb = g*b;
|
|
||||||
Matrix<RealScalar,2,2> A;
|
|
||||||
A << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a,
|
|
||||||
g*c , (gb+h*d)*a;
|
|
||||||
|
|
||||||
// NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal,
|
Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1));
|
||||||
// and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00):
|
Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1)));
|
||||||
|
m_alphas.coeffRef(i) = ComplexScalar(S2.coeff(1,1) + p, z);
|
||||||
Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1));
|
m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z);
|
||||||
Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1)));
|
|
||||||
m_alphas.coeffRef(i) = ComplexScalar(A.coeff(i+1, i+1) + p, z);
|
|
||||||
m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z);
|
|
||||||
|
|
||||||
m_betas.coeffRef(i) =
|
m_betas.coeffRef(i) =
|
||||||
m_betas.coeffRef(i+1) = a*c*d;
|
m_betas.coeffRef(i+1) = a*b;
|
||||||
|
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -552,7 +552,6 @@ namespace Eigen {
|
|||||||
m_T.coeffRef(l,l-1) = Scalar(0.0);
|
m_T.coeffRef(l,l-1) = Scalar(0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename MatrixType>
|
template<typename MatrixType>
|
||||||
RealQZ<MatrixType>& RealQZ<MatrixType>::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ)
|
RealQZ<MatrixType>& RealQZ<MatrixType>::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ)
|
||||||
{
|
{
|
||||||
@ -616,6 +615,37 @@ namespace Eigen {
|
|||||||
}
|
}
|
||||||
// check if we converged before reaching iterations limit
|
// check if we converged before reaching iterations limit
|
||||||
m_info = (local_iter<m_maxIters) ? Success : NoConvergence;
|
m_info = (local_iter<m_maxIters) ? Success : NoConvergence;
|
||||||
|
|
||||||
|
// For each non triangular 2x2 diagonal block of S,
|
||||||
|
// reduce the respective 2x2 diagonal block of T to positive diagonal form using 2x2 SVD.
|
||||||
|
// This step is not mandatory for QZ, but it does help further extraction of eigenvalues/eigenvectors,
|
||||||
|
// and is in par with Lapack/Matlab QZ.
|
||||||
|
if(m_info==Success)
|
||||||
|
{
|
||||||
|
for(Index i=0; i<dim-1; ++i)
|
||||||
|
{
|
||||||
|
if(m_S.coeff(i+1, i) != Scalar(0))
|
||||||
|
{
|
||||||
|
JacobiRotation<Scalar> j_left, j_right;
|
||||||
|
internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right);
|
||||||
|
|
||||||
|
// Apply resulting Jacobi rotations
|
||||||
|
m_S.applyOnTheLeft(i,i+1,j_left);
|
||||||
|
m_S.applyOnTheRight(i,i+1,j_right);
|
||||||
|
m_T.applyOnTheLeft(i,i+1,j_left);
|
||||||
|
m_T.applyOnTheRight(i,i+1,j_right);
|
||||||
|
m_T(i+1,i) = m_T(i,i+1) = Scalar(0);
|
||||||
|
|
||||||
|
if(m_computeQZ) {
|
||||||
|
m_Q.applyOnTheRight(i,i+1,j_left.transpose());
|
||||||
|
m_Z.applyOnTheLeft(i,i+1,j_right.transpose());
|
||||||
|
}
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
} // end compute
|
} // end compute
|
||||||
|
|
||||||
|
@ -367,10 +367,10 @@ void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs)
|
|||||||
hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView<Lower>()
|
hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView<Lower>()
|
||||||
* (conj(h) * matA.col(i).tail(remainingSize)));
|
* (conj(h) * matA.col(i).tail(remainingSize)));
|
||||||
|
|
||||||
hCoeffs.tail(n-i-1) += (conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);
|
hCoeffs.tail(n-i-1) += (conj(h)*RealScalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);
|
||||||
|
|
||||||
matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>()
|
matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>()
|
||||||
.rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1);
|
.rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1));
|
||||||
|
|
||||||
matA.col(i).coeffRef(i+1) = beta;
|
matA.col(i).coeffRef(i+1) = beta;
|
||||||
hCoeffs.coeffRef(i) = h;
|
hCoeffs.coeffRef(i) = h;
|
||||||
|
@ -1367,7 +1367,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is
|
|||||||
EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);
|
EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);
|
||||||
|
|
||||||
Matrix<typename ResultType::Scalar, Dim+1, 1> rhs;
|
Matrix<typename ResultType::Scalar, Dim+1, 1> rhs;
|
||||||
rhs << other,1;
|
rhs.template head<Dim>() = other; rhs[Dim] = typename ResultType::Scalar(1);
|
||||||
Matrix<typename ResultType::Scalar, WorkingRows, 1> res(T.matrix() * rhs);
|
Matrix<typename ResultType::Scalar, WorkingRows, 1> res(T.matrix() * rhs);
|
||||||
return res.template head<Dim>();
|
return res.template head<Dim>();
|
||||||
}
|
}
|
||||||
|
@ -183,7 +183,7 @@ class PardisoImpl : public SparseSolverBase<Derived>
|
|||||||
{
|
{
|
||||||
if(m_isInitialized) // Factorization ran at least once
|
if(m_isInitialized) // Factorization ran at least once
|
||||||
{
|
{
|
||||||
internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0,
|
internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, internal::convert_index<StorageIndex>(m_size),0, 0, 0, m_perm.data(), 0,
|
||||||
m_iparm.data(), m_msglvl, NULL, NULL);
|
m_iparm.data(), m_msglvl, NULL, NULL);
|
||||||
m_isInitialized = false;
|
m_isInitialized = false;
|
||||||
}
|
}
|
||||||
@ -194,11 +194,11 @@ class PardisoImpl : public SparseSolverBase<Derived>
|
|||||||
m_type = type;
|
m_type = type;
|
||||||
bool symmetric = std::abs(m_type) < 10;
|
bool symmetric = std::abs(m_type) < 10;
|
||||||
m_iparm[0] = 1; // No solver default
|
m_iparm[0] = 1; // No solver default
|
||||||
m_iparm[1] = 3; // use Metis for the ordering
|
m_iparm[1] = 2; // use Metis for the ordering
|
||||||
m_iparm[2] = 1; // Numbers of processors, value of OMP_NUM_THREADS
|
m_iparm[2] = 0; // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??)
|
||||||
m_iparm[3] = 0; // No iterative-direct algorithm
|
m_iparm[3] = 0; // No iterative-direct algorithm
|
||||||
m_iparm[4] = 0; // No user fill-in reducing permutation
|
m_iparm[4] = 0; // No user fill-in reducing permutation
|
||||||
m_iparm[5] = 0; // Write solution into x
|
m_iparm[5] = 0; // Write solution into x, b is left unchanged
|
||||||
m_iparm[6] = 0; // Not in use
|
m_iparm[6] = 0; // Not in use
|
||||||
m_iparm[7] = 2; // Max numbers of iterative refinement steps
|
m_iparm[7] = 2; // Max numbers of iterative refinement steps
|
||||||
m_iparm[8] = 0; // Not in use
|
m_iparm[8] = 0; // Not in use
|
||||||
@ -219,7 +219,8 @@ class PardisoImpl : public SparseSolverBase<Derived>
|
|||||||
m_iparm[26] = 0; // No matrix checker
|
m_iparm[26] = 0; // No matrix checker
|
||||||
m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;
|
m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;
|
||||||
m_iparm[34] = 1; // C indexing
|
m_iparm[34] = 1; // C indexing
|
||||||
m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes
|
m_iparm[36] = 0; // CSR
|
||||||
|
m_iparm[59] = 0; // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core
|
||||||
|
|
||||||
memset(m_pt, 0, sizeof(m_pt));
|
memset(m_pt, 0, sizeof(m_pt));
|
||||||
}
|
}
|
||||||
@ -246,7 +247,7 @@ class PardisoImpl : public SparseSolverBase<Derived>
|
|||||||
mutable SparseMatrixType m_matrix;
|
mutable SparseMatrixType m_matrix;
|
||||||
mutable ComputationInfo m_info;
|
mutable ComputationInfo m_info;
|
||||||
bool m_analysisIsOk, m_factorizationIsOk;
|
bool m_analysisIsOk, m_factorizationIsOk;
|
||||||
Index m_type, m_msglvl;
|
StorageIndex m_type, m_msglvl;
|
||||||
mutable void *m_pt[64];
|
mutable void *m_pt[64];
|
||||||
mutable ParameterType m_iparm;
|
mutable ParameterType m_iparm;
|
||||||
mutable IntColVectorType m_perm;
|
mutable IntColVectorType m_perm;
|
||||||
@ -265,10 +266,9 @@ Derived& PardisoImpl<Derived>::compute(const MatrixType& a)
|
|||||||
derived().getMatrix(a);
|
derived().getMatrix(a);
|
||||||
|
|
||||||
Index error;
|
Index error;
|
||||||
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 12, m_size,
|
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 12, internal::convert_index<StorageIndex>(m_size),
|
||||||
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
||||||
m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
|
m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
|
||||||
|
|
||||||
manageErrorCode(error);
|
manageErrorCode(error);
|
||||||
m_analysisIsOk = true;
|
m_analysisIsOk = true;
|
||||||
m_factorizationIsOk = true;
|
m_factorizationIsOk = true;
|
||||||
@ -287,7 +287,7 @@ Derived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a)
|
|||||||
derived().getMatrix(a);
|
derived().getMatrix(a);
|
||||||
|
|
||||||
Index error;
|
Index error;
|
||||||
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 11, m_size,
|
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 11, internal::convert_index<StorageIndex>(m_size),
|
||||||
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
||||||
m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
|
m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
|
||||||
|
|
||||||
@ -306,8 +306,8 @@ Derived& PardisoImpl<Derived>::factorize(const MatrixType& a)
|
|||||||
|
|
||||||
derived().getMatrix(a);
|
derived().getMatrix(a);
|
||||||
|
|
||||||
Index error;
|
Index error;
|
||||||
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 22, m_size,
|
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 22, internal::convert_index<StorageIndex>(m_size),
|
||||||
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
||||||
m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
|
m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
|
||||||
|
|
||||||
@ -354,9 +354,9 @@ void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
Index error;
|
Index error;
|
||||||
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 33, m_size,
|
error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 33, internal::convert_index<StorageIndex>(m_size),
|
||||||
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
|
||||||
m_perm.data(), nrhs, m_iparm.data(), m_msglvl,
|
m_perm.data(), internal::convert_index<StorageIndex>(nrhs), m_iparm.data(), m_msglvl,
|
||||||
rhs_ptr, x.derived().data());
|
rhs_ptr, x.derived().data());
|
||||||
|
|
||||||
manageErrorCode(error);
|
manageErrorCode(error);
|
||||||
@ -371,6 +371,9 @@ void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase
|
|||||||
* using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible.
|
* using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible.
|
||||||
* The vectors or matrices X and B can be either dense or sparse.
|
* The vectors or matrices X and B can be either dense or sparse.
|
||||||
*
|
*
|
||||||
|
* By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
|
||||||
|
* \code solver.pardisoParameterArray()[59] = 1; \endcode
|
||||||
|
*
|
||||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||||
*
|
*
|
||||||
* \implsparsesolverconcept
|
* \implsparsesolverconcept
|
||||||
@ -421,6 +424,9 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
|
|||||||
* using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite.
|
* using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite.
|
||||||
* The vectors or matrices X and B can be either dense or sparse.
|
* The vectors or matrices X and B can be either dense or sparse.
|
||||||
*
|
*
|
||||||
|
* By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
|
||||||
|
* \code solver.pardisoParameterArray()[59] = 1; \endcode
|
||||||
|
*
|
||||||
* \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
* \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||||
* \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used.
|
* \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used.
|
||||||
* Upper|Lower can be used to tell both triangular parts can be used as input.
|
* Upper|Lower can be used to tell both triangular parts can be used as input.
|
||||||
@ -480,6 +486,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
|
|||||||
* For complex matrices, A can also be symmetric only, see the \a Options template parameter.
|
* For complex matrices, A can also be symmetric only, see the \a Options template parameter.
|
||||||
* The vectors or matrices X and B can be either dense or sparse.
|
* The vectors or matrices X and B can be either dense or sparse.
|
||||||
*
|
*
|
||||||
|
* By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
|
||||||
|
* \code solver.pardisoParameterArray()[59] = 1; \endcode
|
||||||
|
*
|
||||||
* \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
* \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||||
* \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used.
|
* \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used.
|
||||||
* Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix.
|
* Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix.
|
||||||
|
@ -419,38 +419,6 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename MatrixType, typename RealScalar, typename Index>
|
|
||||||
void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
|
|
||||||
JacobiRotation<RealScalar> *j_left,
|
|
||||||
JacobiRotation<RealScalar> *j_right)
|
|
||||||
{
|
|
||||||
using std::sqrt;
|
|
||||||
using std::abs;
|
|
||||||
Matrix<RealScalar,2,2> m;
|
|
||||||
m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),
|
|
||||||
numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));
|
|
||||||
JacobiRotation<RealScalar> rot1;
|
|
||||||
RealScalar t = m.coeff(0,0) + m.coeff(1,1);
|
|
||||||
RealScalar d = m.coeff(1,0) - m.coeff(0,1);
|
|
||||||
if(d == RealScalar(0))
|
|
||||||
{
|
|
||||||
rot1.s() = RealScalar(0);
|
|
||||||
rot1.c() = RealScalar(1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// If d!=0, then t/d cannot overflow because the magnitude of the
|
|
||||||
// entries forming d are not too small compared to the ones forming t.
|
|
||||||
RealScalar u = t / d;
|
|
||||||
RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
|
|
||||||
rot1.s() = RealScalar(1) / tmp;
|
|
||||||
rot1.c() = u / tmp;
|
|
||||||
}
|
|
||||||
m.applyOnTheLeft(0,1,rot1);
|
|
||||||
j_right->makeJacobi(m,0,1);
|
|
||||||
*j_left = rot1 * j_right->transpose();
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename _MatrixType, int QRPreconditioner>
|
template<typename _MatrixType, int QRPreconditioner>
|
||||||
struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
|
struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
|
||||||
{
|
{
|
||||||
|
54
Eigen/src/misc/RealSvd2x2.h
Normal file
54
Eigen/src/misc/RealSvd2x2.h
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||||
|
// Copyright (C) 2013-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_REALSVD2X2_H
|
||||||
|
#define EIGEN_REALSVD2X2_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<typename MatrixType, typename RealScalar, typename Index>
|
||||||
|
void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
|
||||||
|
JacobiRotation<RealScalar> *j_left,
|
||||||
|
JacobiRotation<RealScalar> *j_right)
|
||||||
|
{
|
||||||
|
using std::sqrt;
|
||||||
|
using std::abs;
|
||||||
|
Matrix<RealScalar,2,2> m;
|
||||||
|
m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),
|
||||||
|
numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));
|
||||||
|
JacobiRotation<RealScalar> rot1;
|
||||||
|
RealScalar t = m.coeff(0,0) + m.coeff(1,1);
|
||||||
|
RealScalar d = m.coeff(1,0) - m.coeff(0,1);
|
||||||
|
if(d == RealScalar(0))
|
||||||
|
{
|
||||||
|
rot1.s() = RealScalar(0);
|
||||||
|
rot1.c() = RealScalar(1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// If d!=0, then t/d cannot overflow because the magnitude of the
|
||||||
|
// entries forming d are not too small compared to the ones forming t.
|
||||||
|
RealScalar u = t / d;
|
||||||
|
RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
|
||||||
|
rot1.s() = RealScalar(1) / tmp;
|
||||||
|
rot1.c() = u / tmp;
|
||||||
|
}
|
||||||
|
m.applyOnTheLeft(0,1,rot1);
|
||||||
|
j_right->makeJacobi(m,0,1);
|
||||||
|
*j_left = rot1 * j_right->transpose();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_REALSVD2X2_H
|
@ -247,6 +247,7 @@ tan() const
|
|||||||
*
|
*
|
||||||
* \sa tan(), asin(), acos()
|
* \sa tan(), asin(), acos()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const AtanReturnType
|
inline const AtanReturnType
|
||||||
atan() const
|
atan() const
|
||||||
{
|
{
|
||||||
@ -288,6 +289,7 @@ asin() const
|
|||||||
*
|
*
|
||||||
* \sa tan(), sinh(), cosh()
|
* \sa tan(), sinh(), cosh()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const TanhReturnType
|
inline const TanhReturnType
|
||||||
tanh() const
|
tanh() const
|
||||||
{
|
{
|
||||||
@ -301,6 +303,7 @@ tanh() const
|
|||||||
*
|
*
|
||||||
* \sa sin(), tanh(), cosh()
|
* \sa sin(), tanh(), cosh()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const SinhReturnType
|
inline const SinhReturnType
|
||||||
sinh() const
|
sinh() const
|
||||||
{
|
{
|
||||||
@ -314,6 +317,7 @@ sinh() const
|
|||||||
*
|
*
|
||||||
* \sa tan(), sinh(), cosh()
|
* \sa tan(), sinh(), cosh()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const CoshReturnType
|
inline const CoshReturnType
|
||||||
cosh() const
|
cosh() const
|
||||||
{
|
{
|
||||||
@ -331,6 +335,7 @@ cosh() const
|
|||||||
*
|
*
|
||||||
* \sa digamma()
|
* \sa digamma()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const LgammaReturnType
|
inline const LgammaReturnType
|
||||||
lgamma() const
|
lgamma() const
|
||||||
{
|
{
|
||||||
@ -345,6 +350,7 @@ lgamma() const
|
|||||||
*
|
*
|
||||||
* \sa Eigen::digamma(), Eigen::polygamma(), lgamma()
|
* \sa Eigen::digamma(), Eigen::polygamma(), lgamma()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const DigammaReturnType
|
inline const DigammaReturnType
|
||||||
digamma() const
|
digamma() const
|
||||||
{
|
{
|
||||||
@ -363,6 +369,7 @@ digamma() const
|
|||||||
*
|
*
|
||||||
* \sa erfc()
|
* \sa erfc()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const ErfReturnType
|
inline const ErfReturnType
|
||||||
erf() const
|
erf() const
|
||||||
{
|
{
|
||||||
@ -381,6 +388,7 @@ erf() const
|
|||||||
*
|
*
|
||||||
* \sa erf()
|
* \sa erf()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const ErfcReturnType
|
inline const ErfcReturnType
|
||||||
erfc() const
|
erfc() const
|
||||||
{
|
{
|
||||||
@ -436,6 +444,7 @@ cube() const
|
|||||||
*
|
*
|
||||||
* \sa ceil(), floor()
|
* \sa ceil(), floor()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const RoundReturnType
|
inline const RoundReturnType
|
||||||
round() const
|
round() const
|
||||||
{
|
{
|
||||||
@ -449,6 +458,7 @@ round() const
|
|||||||
*
|
*
|
||||||
* \sa ceil(), round()
|
* \sa ceil(), round()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const FloorReturnType
|
inline const FloorReturnType
|
||||||
floor() const
|
floor() const
|
||||||
{
|
{
|
||||||
@ -462,6 +472,7 @@ floor() const
|
|||||||
*
|
*
|
||||||
* \sa floor(), round()
|
* \sa floor(), round()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const CeilReturnType
|
inline const CeilReturnType
|
||||||
ceil() const
|
ceil() const
|
||||||
{
|
{
|
||||||
@ -475,6 +486,7 @@ ceil() const
|
|||||||
*
|
*
|
||||||
* \sa isfinite(), isinf()
|
* \sa isfinite(), isinf()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const IsNaNReturnType
|
inline const IsNaNReturnType
|
||||||
isNaN() const
|
isNaN() const
|
||||||
{
|
{
|
||||||
@ -488,6 +500,7 @@ isNaN() const
|
|||||||
*
|
*
|
||||||
* \sa isnan(), isfinite()
|
* \sa isnan(), isfinite()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const IsInfReturnType
|
inline const IsInfReturnType
|
||||||
isInf() const
|
isInf() const
|
||||||
{
|
{
|
||||||
@ -501,6 +514,7 @@ isInf() const
|
|||||||
*
|
*
|
||||||
* \sa isnan(), isinf()
|
* \sa isnan(), isinf()
|
||||||
*/
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
inline const IsFiniteReturnType
|
inline const IsFiniteReturnType
|
||||||
isFinite() const
|
isFinite() const
|
||||||
{
|
{
|
||||||
|
@ -44,4 +44,8 @@ before-evaluators
|
|||||||
7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)
|
7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)
|
||||||
7591:09a8e2186610 # 3.3-alpha1
|
7591:09a8e2186610 # 3.3-alpha1
|
||||||
7650:b0f3c8f43025 # help clang inlining
|
7650:b0f3c8f43025 # help clang inlining
|
||||||
|
8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs)
|
||||||
|
8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes
|
||||||
|
8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path
|
||||||
|
8985:d935df21a082 # Remove the rotating kernel.
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
// for linear algebra.
|
// for linear algebra.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
// Copyright (C) 2012-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||||
//
|
//
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
@ -10,6 +10,7 @@
|
|||||||
#include "main.h"
|
#include "main.h"
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <Eigen/Eigenvalues>
|
#include <Eigen/Eigenvalues>
|
||||||
|
#include <Eigen/LU>
|
||||||
|
|
||||||
template<typename MatrixType> void generalized_eigensolver_real(const MatrixType& m)
|
template<typename MatrixType> void generalized_eigensolver_real(const MatrixType& m)
|
||||||
{
|
{
|
||||||
@ -21,6 +22,7 @@ template<typename MatrixType> void generalized_eigensolver_real(const MatrixType
|
|||||||
Index cols = m.cols();
|
Index cols = m.cols();
|
||||||
|
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
|
typedef std::complex<Scalar> ComplexScalar;
|
||||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
||||||
|
|
||||||
MatrixType a = MatrixType::Random(rows,cols);
|
MatrixType a = MatrixType::Random(rows,cols);
|
||||||
@ -31,14 +33,28 @@ template<typename MatrixType> void generalized_eigensolver_real(const MatrixType
|
|||||||
MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1;
|
MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1;
|
||||||
|
|
||||||
// lets compare to GeneralizedSelfAdjointEigenSolver
|
// lets compare to GeneralizedSelfAdjointEigenSolver
|
||||||
GeneralizedSelfAdjointEigenSolver<MatrixType> symmEig(spdA, spdB);
|
{
|
||||||
GeneralizedEigenSolver<MatrixType> eig(spdA, spdB);
|
GeneralizedSelfAdjointEigenSolver<MatrixType> symmEig(spdA, spdB);
|
||||||
|
GeneralizedEigenSolver<MatrixType> eig(spdA, spdB);
|
||||||
|
|
||||||
VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0);
|
VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0);
|
||||||
|
|
||||||
VectorType realEigenvalues = eig.eigenvalues().real();
|
VectorType realEigenvalues = eig.eigenvalues().real();
|
||||||
std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size());
|
std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size());
|
||||||
VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues());
|
VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues());
|
||||||
|
}
|
||||||
|
|
||||||
|
// non symmetric case:
|
||||||
|
{
|
||||||
|
GeneralizedEigenSolver<MatrixType> eig(a,b);
|
||||||
|
for(Index k=0; k<cols; ++k)
|
||||||
|
{
|
||||||
|
Matrix<ComplexScalar,Dynamic,Dynamic> tmp = (eig.betas()(k)*a).template cast<ComplexScalar>() - eig.alphas()(k)*b;
|
||||||
|
if(tmp.norm()>(std::numeric_limits<Scalar>::min)())
|
||||||
|
tmp /= tmp.norm();
|
||||||
|
VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// regression test for bug 1098
|
// regression test for bug 1098
|
||||||
{
|
{
|
||||||
@ -57,7 +73,7 @@ void test_eigensolver_generalized_real()
|
|||||||
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
|
s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
|
||||||
CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) );
|
CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) );
|
||||||
|
|
||||||
// some trivial but implementation-wise tricky cases
|
// some trivial but implementation-wise special cases
|
||||||
CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) );
|
CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) );
|
||||||
CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) );
|
CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) );
|
||||||
CALL_SUBTEST_3( generalized_eigensolver_real(Matrix<double,1,1>()) );
|
CALL_SUBTEST_3( generalized_eigensolver_real(Matrix<double,1,1>()) );
|
||||||
|
@ -58,6 +58,8 @@ template<typename Scalar,int Size> void homogeneous(void)
|
|||||||
T2MatrixType t2 = T2MatrixType::Random();
|
T2MatrixType t2 = T2MatrixType::Random();
|
||||||
VERIFY_IS_APPROX(t2 * (v0.homogeneous().eval()), t2 * v0.homogeneous());
|
VERIFY_IS_APPROX(t2 * (v0.homogeneous().eval()), t2 * v0.homogeneous());
|
||||||
VERIFY_IS_APPROX(t2 * (m0.colwise().homogeneous().eval()), t2 * m0.colwise().homogeneous());
|
VERIFY_IS_APPROX(t2 * (m0.colwise().homogeneous().eval()), t2 * m0.colwise().homogeneous());
|
||||||
|
VERIFY_IS_APPROX(t2 * (v0.homogeneous().asDiagonal()), t2 * hv0.asDiagonal());
|
||||||
|
VERIFY_IS_APPROX((v0.homogeneous().asDiagonal()) * t2, hv0.asDiagonal() * t2);
|
||||||
|
|
||||||
VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2,
|
VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2,
|
||||||
v0.transpose().rowwise().homogeneous() * t2);
|
v0.transpose().rowwise().homogeneous() * t2);
|
||||||
|
@ -49,11 +49,20 @@ template<typename MatrixType> void real_qz(const MatrixType& m)
|
|||||||
for (Index i=0; i<A.cols(); i++)
|
for (Index i=0; i<A.cols(); i++)
|
||||||
for (Index j=0; j<i; j++) {
|
for (Index j=0; j<i; j++) {
|
||||||
if (abs(qz.matrixT()(i,j))!=Scalar(0.0))
|
if (abs(qz.matrixT()(i,j))!=Scalar(0.0))
|
||||||
|
{
|
||||||
|
std::cerr << "Error: T(" << i << "," << j << ") = " << qz.matrixT()(i,j) << std::endl;
|
||||||
all_zeros = false;
|
all_zeros = false;
|
||||||
|
}
|
||||||
if (j<i-1 && abs(qz.matrixS()(i,j))!=Scalar(0.0))
|
if (j<i-1 && abs(qz.matrixS()(i,j))!=Scalar(0.0))
|
||||||
|
{
|
||||||
|
std::cerr << "Error: S(" << i << "," << j << ") = " << qz.matrixS()(i,j) << std::endl;
|
||||||
all_zeros = false;
|
all_zeros = false;
|
||||||
|
}
|
||||||
if (j==i-1 && j>0 && abs(qz.matrixS()(i,j))!=Scalar(0.0) && abs(qz.matrixS()(i-1,j-1))!=Scalar(0.0))
|
if (j==i-1 && j>0 && abs(qz.matrixS()(i,j))!=Scalar(0.0) && abs(qz.matrixS()(i-1,j-1))!=Scalar(0.0))
|
||||||
|
{
|
||||||
|
std::cerr << "Error: S(" << i << "," << j << ") = " << qz.matrixS()(i,j) << " && S(" << i-1 << "," << j-1 << ") = " << qz.matrixS()(i-1,j-1) << std::endl;
|
||||||
all_zeros = false;
|
all_zeros = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
VERIFY_IS_EQUAL(all_zeros, true);
|
VERIFY_IS_EQUAL(all_zeros, true);
|
||||||
VERIFY_IS_APPROX(qz.matrixQ()*qz.matrixS()*qz.matrixZ(), A);
|
VERIFY_IS_APPROX(qz.matrixQ()*qz.matrixS()*qz.matrixZ(), A);
|
||||||
|
@ -202,7 +202,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
// across k dimension.
|
// across k dimension.
|
||||||
const TensorOpCost cost =
|
const TensorOpCost cost =
|
||||||
contractionCost(m, n, bm, bn, bk, shard_by_col, false);
|
contractionCost(m, n, bm, bn, bk, shard_by_col, false);
|
||||||
Index num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
|
int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
|
||||||
static_cast<double>(n) * m, cost, this->m_device.numThreads());
|
static_cast<double>(n) * m, cost, this->m_device.numThreads());
|
||||||
|
|
||||||
// TODO(dvyukov): this is a stop-gap to prevent regressions while the cost
|
// TODO(dvyukov): this is a stop-gap to prevent regressions while the cost
|
||||||
@ -301,7 +301,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
class Context {
|
class Context {
|
||||||
public:
|
public:
|
||||||
Context(const Device& device, int num_threads, LhsMapper& lhs,
|
Context(const Device& device, int num_threads, LhsMapper& lhs,
|
||||||
RhsMapper& rhs, Scalar* buffer, Index m, Index n, Index k, Index bm,
|
RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm,
|
||||||
Index bn, Index bk, Index nm, Index nn, Index nk, Index gm,
|
Index bn, Index bk, Index nm, Index nn, Index nk, Index gm,
|
||||||
Index gn, Index nm0, Index nn0, bool shard_by_col,
|
Index gn, Index nm0, Index nn0, bool shard_by_col,
|
||||||
bool parallel_pack)
|
bool parallel_pack)
|
||||||
@ -309,13 +309,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
lhs_(lhs),
|
lhs_(lhs),
|
||||||
rhs_(rhs),
|
rhs_(rhs),
|
||||||
buffer_(buffer),
|
buffer_(buffer),
|
||||||
output_(buffer, m),
|
output_(buffer, tm),
|
||||||
num_threads_(num_threads),
|
num_threads_(num_threads),
|
||||||
shard_by_col_(shard_by_col),
|
shard_by_col_(shard_by_col),
|
||||||
parallel_pack_(parallel_pack),
|
parallel_pack_(parallel_pack),
|
||||||
m_(m),
|
m_(tm),
|
||||||
n_(n),
|
n_(tn),
|
||||||
k_(k),
|
k_(tk),
|
||||||
bm_(bm),
|
bm_(bm),
|
||||||
bn_(bn),
|
bn_(bn),
|
||||||
bk_(bk),
|
bk_(bk),
|
||||||
|
@ -106,7 +106,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
|
|||||||
// Build a thread pool device on top the an existing pool of threads.
|
// Build a thread pool device on top the an existing pool of threads.
|
||||||
struct ThreadPoolDevice {
|
struct ThreadPoolDevice {
|
||||||
// The ownership of the thread pool remains with the caller.
|
// The ownership of the thread pool remains with the caller.
|
||||||
ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
|
ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { }
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||||
return internal::aligned_malloc(num_bytes);
|
return internal::aligned_malloc(num_bytes);
|
||||||
@ -130,7 +130,7 @@ struct ThreadPoolDevice {
|
|||||||
::memset(buffer, c, n);
|
::memset(buffer, c, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
EIGEN_STRONG_INLINE int numThreads() const {
|
||||||
return num_threads_;
|
return num_threads_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,7 +182,7 @@ struct ThreadPoolDevice {
|
|||||||
std::function<void(Index, Index)> f) const {
|
std::function<void(Index, Index)> f) const {
|
||||||
typedef TensorCostModel<ThreadPoolDevice> CostModel;
|
typedef TensorCostModel<ThreadPoolDevice> CostModel;
|
||||||
if (n <= 1 || numThreads() == 1 ||
|
if (n <= 1 || numThreads() == 1 ||
|
||||||
CostModel::numThreads(n, cost, numThreads()) == 1) {
|
CostModel::numThreads(n, cost, static_cast<int>(numThreads())) == 1) {
|
||||||
f(0, n);
|
f(0, n);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -242,7 +242,7 @@ struct ThreadPoolDevice {
|
|||||||
// Recursively divide size into halves until we reach block_size.
|
// Recursively divide size into halves until we reach block_size.
|
||||||
// Division code rounds mid to block_size, so we are guaranteed to get
|
// Division code rounds mid to block_size, so we are guaranteed to get
|
||||||
// block_count leaves that do actual computations.
|
// block_count leaves that do actual computations.
|
||||||
Barrier barrier(block_count);
|
Barrier barrier(static_cast<unsigned int>(block_count));
|
||||||
std::function<void(Index, Index)> handleRange;
|
std::function<void(Index, Index)> handleRange;
|
||||||
handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) {
|
handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) {
|
||||||
if (last - first <= block_size) {
|
if (last - first <= block_size) {
|
||||||
@ -268,7 +268,7 @@ struct ThreadPoolDevice {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
ThreadPoolInterface* pool_;
|
ThreadPoolInterface* pool_;
|
||||||
size_t num_threads_;
|
int num_threads_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -426,6 +426,20 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
|||||||
m_arg3Impl(op.arg3Expression(), device)
|
m_arg3Impl(op.arg3Expression(), device)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
|
||||||
|
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
|
||||||
|
typename internal::traits<Arg2Type>::StorageKind>::value),
|
||||||
|
STORAGE_KIND_MUST_MATCH)
|
||||||
|
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
|
||||||
|
typename internal::traits<Arg3Type>::StorageKind>::value),
|
||||||
|
STORAGE_KIND_MUST_MATCH)
|
||||||
|
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
|
||||||
|
typename internal::traits<Arg2Type>::Index>::value),
|
||||||
|
STORAGE_INDEX_MUST_MATCH)
|
||||||
|
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
|
||||||
|
typename internal::traits<Arg3Type>::Index>::value),
|
||||||
|
STORAGE_INDEX_MUST_MATCH)
|
||||||
|
|
||||||
eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
|
eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,22 +227,6 @@ struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprT
|
|||||||
TernaryOp(typename Arg1XprType::Scalar,
|
TernaryOp(typename Arg1XprType::Scalar,
|
||||||
typename Arg2XprType::Scalar,
|
typename Arg2XprType::Scalar,
|
||||||
typename Arg3XprType::Scalar)>::type Scalar;
|
typename Arg3XprType::Scalar)>::type Scalar;
|
||||||
EIGEN_STATIC_ASSERT(
|
|
||||||
(internal::is_same<typename traits<Arg1XprType>::StorageKind,
|
|
||||||
typename traits<Arg2XprType>::StorageKind>::value),
|
|
||||||
STORAGE_KIND_MUST_MATCH)
|
|
||||||
EIGEN_STATIC_ASSERT(
|
|
||||||
(internal::is_same<typename traits<Arg1XprType>::StorageKind,
|
|
||||||
typename traits<Arg3XprType>::StorageKind>::value),
|
|
||||||
STORAGE_KIND_MUST_MATCH)
|
|
||||||
EIGEN_STATIC_ASSERT(
|
|
||||||
(internal::is_same<typename traits<Arg1XprType>::Index,
|
|
||||||
typename traits<Arg2XprType>::Index>::value),
|
|
||||||
STORAGE_INDEX_MUST_MATCH)
|
|
||||||
EIGEN_STATIC_ASSERT(
|
|
||||||
(internal::is_same<typename traits<Arg1XprType>::Index,
|
|
||||||
typename traits<Arg3XprType>::Index>::value),
|
|
||||||
STORAGE_INDEX_MUST_MATCH)
|
|
||||||
typedef traits<Arg1XprType> XprTraits;
|
typedef traits<Arg1XprType> XprTraits;
|
||||||
typedef typename traits<Arg1XprType>::StorageKind StorageKind;
|
typedef typename traits<Arg1XprType>::StorageKind StorageKind;
|
||||||
typedef typename traits<Arg1XprType>::Index Index;
|
typedef typename traits<Arg1XprType>::Index Index;
|
||||||
|
@ -84,6 +84,14 @@ struct functor_traits<scalar_sigmoid_op<T> > {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<typename Reducer, typename Device>
|
||||||
|
struct reducer_traits {
|
||||||
|
enum {
|
||||||
|
Cost = 1,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
// Standard reduction functors
|
// Standard reduction functors
|
||||||
template <typename T> struct SumReducer
|
template <typename T> struct SumReducer
|
||||||
{
|
{
|
||||||
@ -119,6 +127,15 @@ template <typename T> struct SumReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<SumReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::HasAdd
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MeanReducer
|
template <typename T> struct MeanReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
|
static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
|
||||||
@ -162,6 +179,15 @@ template <typename T> struct MeanReducer
|
|||||||
DenseIndex packetCount_;
|
DenseIndex packetCount_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<MeanReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::HasAdd
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MaxReducer
|
template <typename T> struct MaxReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasMax;
|
static const bool PacketAccess = packet_traits<T>::HasMax;
|
||||||
@ -195,6 +221,15 @@ template <typename T> struct MaxReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<MaxReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::HasMax
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct MinReducer
|
template <typename T> struct MinReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = packet_traits<T>::HasMin;
|
static const bool PacketAccess = packet_traits<T>::HasMin;
|
||||||
@ -228,6 +263,14 @@ template <typename T> struct MinReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<MinReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::HasMin
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct ProdReducer
|
template <typename T> struct ProdReducer
|
||||||
{
|
{
|
||||||
@ -263,6 +306,14 @@ template <typename T> struct ProdReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<ProdReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::MulCost,
|
||||||
|
PacketAccess = PacketType<T, Device>::HasMul
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct AndReducer
|
struct AndReducer
|
||||||
{
|
{
|
||||||
@ -280,6 +331,15 @@ struct AndReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
struct reducer_traits<AndReducer, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = 1,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct OrReducer {
|
struct OrReducer {
|
||||||
static const bool PacketAccess = false;
|
static const bool PacketAccess = false;
|
||||||
static const bool IsStateful = false;
|
static const bool IsStateful = false;
|
||||||
@ -295,6 +355,15 @@ struct OrReducer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
struct reducer_traits<OrReducer, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = 1,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// Argmin/Argmax reducers
|
// Argmin/Argmax reducers
|
||||||
template <typename T> struct ArgMaxTupleReducer
|
template <typename T> struct ArgMaxTupleReducer
|
||||||
{
|
{
|
||||||
@ -312,6 +381,15 @@ template <typename T> struct ArgMaxTupleReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> struct ArgMinTupleReducer
|
template <typename T> struct ArgMinTupleReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = false;
|
static const bool PacketAccess = false;
|
||||||
@ -328,6 +406,14 @@ template <typename T> struct ArgMinTupleReducer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename Device>
|
||||||
|
struct reducer_traits<ArgMinTupleReducer<T>, Device> {
|
||||||
|
enum {
|
||||||
|
Cost = NumTraits<T>::AddCost,
|
||||||
|
PacketAccess = false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// Random number generation
|
// Random number generation
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -47,22 +47,39 @@ template <> struct max_n_1<0> {
|
|||||||
|
|
||||||
// Default packet types
|
// Default packet types
|
||||||
template <typename Scalar, typename Device>
|
template <typename Scalar, typename Device>
|
||||||
struct PacketType {
|
struct PacketType : internal::packet_traits<Scalar> {
|
||||||
typedef typename internal::packet_traits<Scalar>::type type;
|
typedef typename internal::packet_traits<Scalar>::type type;
|
||||||
enum { size = internal::unpacket_traits<type>::size };
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// For CUDA packet types when using a GpuDevice
|
// For CUDA packet types when using a GpuDevice
|
||||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16)
|
||||||
template <>
|
template <>
|
||||||
struct PacketType<float, GpuDevice> {
|
struct PacketType<half, GpuDevice> {
|
||||||
typedef float4 type;
|
typedef half2 type;
|
||||||
static const int size = 4;
|
|
||||||
};
|
|
||||||
template <>
|
|
||||||
struct PacketType<double, GpuDevice> {
|
|
||||||
typedef double2 type;
|
|
||||||
static const int size = 2;
|
static const int size = 2;
|
||||||
|
enum {
|
||||||
|
HasAdd = 1,
|
||||||
|
HasSub = 1,
|
||||||
|
HasMul = 1,
|
||||||
|
HasNegate = 1,
|
||||||
|
HasAbs = 1,
|
||||||
|
HasArg = 0,
|
||||||
|
HasAbs2 = 0,
|
||||||
|
HasMin = 1,
|
||||||
|
HasMax = 1,
|
||||||
|
HasConj = 0,
|
||||||
|
HasSetLinear = 0,
|
||||||
|
HasBlend = 0,
|
||||||
|
|
||||||
|
HasDiv = 1,
|
||||||
|
HasSqrt = 1,
|
||||||
|
HasRsqrt = 1,
|
||||||
|
HasExp = 1,
|
||||||
|
HasLog = 1,
|
||||||
|
HasLog1p = 0,
|
||||||
|
HasLog10 = 0,
|
||||||
|
HasPow = 1,
|
||||||
|
};
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -148,7 +148,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
|
EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
|
||||||
|
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
@ -130,15 +130,17 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
|
|||||||
if (block == 0) {
|
if (block == 0) {
|
||||||
// We're the first block to run, initialize the output value
|
// We're the first block to run, initialize the output value
|
||||||
atomicExch(output, reducer.initialize());
|
atomicExch(output, reducer.initialize());
|
||||||
unsigned int old = atomicExch(semaphore, 2u);
|
__threadfence();
|
||||||
assert(old == 1u);
|
atomicExch(semaphore, 2u);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
// Wait for the first block to initialize the output value.
|
||||||
// Use atomicCAS here to ensure that the reads aren't cached
|
// Use atomicCAS here to ensure that the reads aren't cached
|
||||||
unsigned int val = atomicCAS(semaphore, 2u, 2u);
|
unsigned int val;
|
||||||
while (val < 2u) {
|
do {
|
||||||
val = atomicCAS(semaphore, 2u, 2u);
|
val = atomicCAS(semaphore, 2u, 2u);
|
||||||
}
|
}
|
||||||
|
while (val < 2u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -166,12 +168,8 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (gridDim.x > 1 && threadIdx.x == 0) {
|
if (gridDim.x > 1 && threadIdx.x == 0) {
|
||||||
unsigned int ticket = atomicInc(semaphore, UINT_MAX);
|
// Let the last block reset the semaphore
|
||||||
assert(ticket >= 2u);
|
atomicInc(semaphore, gridDim.x + 1);
|
||||||
if (ticket == gridDim.x + 1) {
|
|
||||||
// We're the last block, reset the semaphore
|
|
||||||
*semaphore = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,10 +328,10 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
|||||||
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||||
// so reduce the scope of the optimized version of the code to the simple case
|
// so reduce the scope of the optimized version of the code to the simple case
|
||||||
// of floats and half floats.
|
// of floats and half floats.
|
||||||
#ifdef EIGEN_HAS_CUDA_FP16
|
#ifdef EIGEN_HAS_CUDA_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||||
#else
|
#else
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
@ -348,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs);
|
FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -610,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
|||||||
#ifdef EIGEN_HAS_CUDA_FP16
|
#ifdef EIGEN_HAS_CUDA_FP16
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
|
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||||
#else
|
#else
|
||||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
@ -629,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
|
return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||||
typedef DSizes<Index, NumDims> Dimensions;
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
|
|
||||||
@ -106,7 +106,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
m_output(NULL) {
|
m_output(NULL) {
|
||||||
|
|
||||||
// Accumulating a scalar isn't supported.
|
// Accumulating a scalar isn't supported.
|
||||||
EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
eigen_assert(m_axis >= 0 && m_axis < NumDims);
|
eigen_assert(m_axis >= 0 && m_axis < NumDims);
|
||||||
|
|
||||||
// Compute stride of scan axis
|
// Compute stride of scan axis
|
||||||
@ -122,7 +122,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
|
||||||
return m_dimensions;
|
return m_dimensions;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
||||||
@ -136,7 +136,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
|
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
|
||||||
return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
|
return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
|
||||||
@ -152,6 +152,10 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
return m_output[index];
|
return m_output[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const {
|
||||||
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
if (m_output != NULL) {
|
if (m_output != NULL) {
|
||||||
m_device.deallocate(m_output);
|
m_device.deallocate(m_output);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user