From d6d39c7ddb127d91ebfa4ea62e93ea51036f1760 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 7 Jun 2016 14:35:08 -0700 Subject: [PATCH 01/86] Added missing EIGEN_DEVICE_FUNC --- unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 52cfc2824..d34f1e328 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -148,7 +148,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast(m_impl.data()); } - const TensorEvaluator& impl() const { return m_impl; } + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } protected: TensorEvaluator m_impl; From 8fd57a97f203edac3f7e8681eafe752294386a24 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 7 Jun 2016 18:22:18 -0700 Subject: [PATCH 02/86] Enable the vectorization of adds and mults of fp16 --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 51386506f..959dff886 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -28,6 +28,8 @@ template<> struct packet_traits : default_packet_traits AlignedOnScalar = 1, size=2, HasHalfPacket = 0, + HasAdd = 1, + HasMul = 1, HasDiv = 1, HasSqrt = 1, HasRsqrt = 1, From 9dd9d58273362d643eaa0b8f4f16f8aa3d5ef6cd Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Wed, 8 Jun 2016 15:36:42 +0200 Subject: [PATCH 03/86] Copied a regression test from 3.2 branch. --- test/geo_homogeneous.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp index bf63c69ec..305794cdf 100644 --- a/test/geo_homogeneous.cpp +++ b/test/geo_homogeneous.cpp @@ -58,6 +58,8 @@ template void homogeneous(void) T2MatrixType t2 = T2MatrixType::Random(); VERIFY_IS_APPROX(t2 * (v0.homogeneous().eval()), t2 * v0.homogeneous()); VERIFY_IS_APPROX(t2 * (m0.colwise().homogeneous().eval()), t2 * m0.colwise().homogeneous()); + VERIFY_IS_APPROX(t2 * (v0.homogeneous().asDiagonal()), t2 * hv0.asDiagonal()); + VERIFY_IS_APPROX((v0.homogeneous().asDiagonal()) * t2, hv0.asDiagonal() * t2); VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2, v0.transpose().rowwise().homogeneous() * t2); From 9fc8379328dad5fb74249003f56fb608c304ae4d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 8 Jun 2016 16:39:11 +0200 Subject: [PATCH 04/86] Fix extraction of complex eigenvalue pairs in real generalized eigenvalue problems. --- .../src/Eigenvalues/GeneralizedEigenSolver.h | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index a9d6790d5..07a9ccf46 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,13 +327,33 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z); + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T + // From the eigen decomposition of T = U * E * U^-1, + // we can extract the eigenvalues of (U^-1 * S * U) / E + // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)]. + // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00): + + // T = [a b ; 0 c] + // S = [e f ; g h] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1); + RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); + RealScalar d = c-a; + RealScalar gb = g*b; + Matrix A; + A << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, + g*c , (gb+h*d)*a; + + // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, + // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): + + Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1)); + Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1))); + m_alphas.coeffRef(i) = ComplexScalar(A.coeff(i+1, i+1) + p, z); + m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z); + + m_betas.coeffRef(i) = + m_betas.coeffRef(i+1) = a*c*d; - m_betas.coeffRef(i) = m_realQZ.matrixT().coeff(i,i); - m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i); i += 2; } } From df095cab104550a8179c28e93d477406dfab6849 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 8 Jun 2016 18:31:19 +0200 Subject: [PATCH 05/86] Fixes for PARDISO: warnings, and defaults to metis+ in-core mode. --- Eigen/src/PardisoSupport/PardisoSupport.h | 35 ++++++++++++++--------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 80d914f25..091c3970e 100644 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -183,7 +183,7 @@ class PardisoImpl : public SparseSolverBase { if(m_isInitialized) // Factorization ran at least once { - internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0, + internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, internal::convert_index(m_size),0, 0, 0, m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); m_isInitialized = false; } @@ -194,11 +194,11 @@ class PardisoImpl : public SparseSolverBase m_type = type; bool symmetric = std::abs(m_type) < 10; m_iparm[0] = 1; // No solver default - m_iparm[1] = 3; // use Metis for the ordering - m_iparm[2] = 1; // Numbers of processors, value of OMP_NUM_THREADS + m_iparm[1] = 2; // use Metis for the ordering + m_iparm[2] = 0; // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??) m_iparm[3] = 0; // No iterative-direct algorithm m_iparm[4] = 0; // No user fill-in reducing permutation - m_iparm[5] = 0; // Write solution into x + m_iparm[5] = 0; // Write solution into x, b is left unchanged m_iparm[6] = 0; // Not in use m_iparm[7] = 2; // Max numbers of iterative refinement steps m_iparm[8] = 0; // Not in use @@ -219,7 +219,8 @@ class PardisoImpl : public SparseSolverBase m_iparm[26] = 0; // No matrix checker m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0; m_iparm[34] = 1; // C indexing - m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes + m_iparm[36] = 0; // CSR + m_iparm[59] = 0; // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core memset(m_pt, 0, sizeof(m_pt)); } @@ -246,7 +247,7 @@ class PardisoImpl : public SparseSolverBase mutable SparseMatrixType m_matrix; mutable ComputationInfo m_info; bool m_analysisIsOk, m_factorizationIsOk; - Index m_type, m_msglvl; + StorageIndex m_type, m_msglvl; mutable void *m_pt[64]; mutable ParameterType m_iparm; mutable IntColVectorType m_perm; @@ -265,10 +266,9 @@ Derived& PardisoImpl::compute(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); - manageErrorCode(error); m_analysisIsOk = true; m_factorizationIsOk = true; @@ -287,7 +287,7 @@ Derived& PardisoImpl::analyzePattern(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -306,8 +306,8 @@ Derived& PardisoImpl::factorize(const MatrixType& a) derived().getMatrix(a); - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, m_size, + Index error; + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -354,9 +354,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase } Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), nrhs, m_iparm.data(), m_msglvl, + m_perm.data(), internal::convert_index(nrhs), m_iparm.data(), m_msglvl, rhs_ptr, x.derived().data()); manageErrorCode(error); @@ -371,6 +371,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * * \implsparsesolverconcept @@ -421,6 +424,9 @@ class PardisoLU : public PardisoImpl< PardisoLU > * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used. * Upper|Lower can be used to tell both triangular parts can be used as input. @@ -480,6 +486,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > * For complex matrices, A can also be symmetric only, see the \a Options template parameter. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used. * Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix. From 0beabb4776b887c25977186b5af6811eb49aaa23 Mon Sep 17 00:00:00 2001 From: Abhijit Kundu Date: Wed, 8 Jun 2016 16:12:04 -0400 Subject: [PATCH 06/86] Fixed type conversion from int --- Eigen/src/Geometry/Transform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 1a06c1e35..073f4dcd1 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -1367,7 +1367,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES); Matrix rhs; - rhs.template head() = other; rhs[Dim] = 1; + rhs.template head() = other; rhs[Dim] = typename ResultType::Scalar(1); Matrix res(T.matrix() * rhs); return res.template head(); } From a20d2ec1c02d2629cbfa8871898b64cd22445595 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 16:16:22 +0200 Subject: [PATCH 07/86] Fix shadow variable, and indexing. --- Eigen/src/Eigenvalues/GeneralizedEigenSolver.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 07a9ccf46..08caed281 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -339,17 +339,17 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); RealScalar d = c-a; RealScalar gb = g*b; - Matrix A; - A << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, - g*c , (gb+h*d)*a; + Matrix S2; + S2 << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, + g*c , (gb+h*d)*a; // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): - Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(A.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z); + Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1)); + Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1))); + m_alphas.coeffRef(i) = ComplexScalar(S2.coeff(1,1) + p, z); + m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z); m_betas.coeffRef(i) = m_betas.coeffRef(i+1) = a*c*d; From 15890c304edbccedc8a989468ed3fc475f428059 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 16:17:27 +0200 Subject: [PATCH 08/86] Add unit test for non symmetric generalized eigenvalues --- test/eigensolver_generalized_real.cpp | 32 ++++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/test/eigensolver_generalized_real.cpp b/test/eigensolver_generalized_real.cpp index a46a2e50e..da14482de 100644 --- a/test/eigensolver_generalized_real.cpp +++ b/test/eigensolver_generalized_real.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2012 Gael Guennebaud +// Copyright (C) 2012-2016 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,6 +10,7 @@ #include "main.h" #include #include +#include template void generalized_eigensolver_real(const MatrixType& m) { @@ -21,6 +22,7 @@ template void generalized_eigensolver_real(const MatrixType Index cols = m.cols(); typedef typename MatrixType::Scalar Scalar; + typedef std::complex ComplexScalar; typedef Matrix VectorType; MatrixType a = MatrixType::Random(rows,cols); @@ -31,14 +33,28 @@ template void generalized_eigensolver_real(const MatrixType MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1; // lets compare to GeneralizedSelfAdjointEigenSolver - GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); - GeneralizedEigenSolver eig(spdA, spdB); + { + GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); + GeneralizedEigenSolver eig(spdA, spdB); - VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); + VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); - VectorType realEigenvalues = eig.eigenvalues().real(); - std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); - VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + VectorType realEigenvalues = eig.eigenvalues().real(); + std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); + VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + } + + // non symmetric case: + { + GeneralizedEigenSolver eig(a,b); + for(Index k=0; k tmp = (eig.betas()(k)*a).template cast() - eig.alphas()(k)*b; + if(tmp.norm()>(std::numeric_limits::min)()) + tmp /= tmp.norm(); + VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) ); + } + } // regression test for bug 1098 { @@ -57,7 +73,7 @@ void test_eigensolver_generalized_real() s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) ); - // some trivial but implementation-wise tricky cases + // some trivial but implementation-wise special cases CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) ); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) ); CALL_SUBTEST_3( generalized_eigensolver_real(Matrix()) ); From c1f9ca925405c8fad126f327b4bdca7f983fc96e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:11:03 +0200 Subject: [PATCH 09/86] Update RealQZ to reduce 2x2 diagonal block of T corresponding to non reduced diagonal block of S to positive diagonal form. This step involve a real 2x2 SVD problem. The respective routine is thus in src/misc/ to be shared by both EVD and AVD modules. --- Eigen/Eigenvalues | 1 + Eigen/SVD | 1 + Eigen/src/Eigenvalues/RealQZ.h | 32 +++++++++++++++++++- Eigen/src/SVD/JacobiSVD.h | 32 -------------------- Eigen/src/misc/RealSvd2x2.h | 54 ++++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 33 deletions(-) create mode 100644 Eigen/src/misc/RealSvd2x2.h diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index ea93eb303..216a6d51d 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -32,6 +32,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/RealSchur.h" #include "src/Eigenvalues/EigenSolver.h" diff --git a/Eigen/SVD b/Eigen/SVD index b353f3f54..565d9c90d 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -31,6 +31,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index a62071d42..c4715b954 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -552,7 +552,6 @@ namespace Eigen { m_T.coeffRef(l,l-1) = Scalar(0.0); } - template RealQZ& RealQZ::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ) { @@ -616,6 +615,37 @@ namespace Eigen { } // check if we converged before reaching iterations limit m_info = (local_iter j_left, j_right; + internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right); + + // Apply resulting Jacobi rotations + m_T.applyOnTheLeft(i,i+1,j_left); + m_T.applyOnTheRight(i,i+1,j_right); + m_S.applyOnTheLeft(i,i+1,j_left); + m_S.applyOnTheRight(i,i+1,j_right); + m_T(i,i+1) = Scalar(0); + + if(m_computeQZ) { + m_Q.applyOnTheRight(i,i+1,j_left.transpose()); + m_Z.applyOnTheLeft(i,i+1,j_right.transpose()); + } + + i++; + } + } + } + return *this; } // end compute diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 1940c8294..b83fd7a4d 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -419,38 +419,6 @@ struct svd_precondition_2x2_block_to_be_real } }; -template -void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, - JacobiRotation *j_left, - JacobiRotation *j_right) -{ - using std::sqrt; - using std::abs; - Matrix m; - m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), - numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); - JacobiRotation rot1; - RealScalar t = m.coeff(0,0) + m.coeff(1,1); - RealScalar d = m.coeff(1,0) - m.coeff(0,1); - if(d == RealScalar(0)) - { - rot1.s() = RealScalar(0); - rot1.c() = RealScalar(1); - } - else - { - // If d!=0, then t/d cannot overflow because the magnitude of the - // entries forming d are not too small compared to the ones forming t. - RealScalar u = t / d; - RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); - rot1.s() = RealScalar(1) / tmp; - rot1.c() = u / tmp; - } - m.applyOnTheLeft(0,1,rot1); - j_right->makeJacobi(m,0,1); - *j_left = rot1 * j_right->transpose(); -} - template struct traits > { diff --git a/Eigen/src/misc/RealSvd2x2.h b/Eigen/src/misc/RealSvd2x2.h new file mode 100644 index 000000000..cdd7777d2 --- /dev/null +++ b/Eigen/src/misc/RealSvd2x2.h @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Benoit Jacob +// Copyright (C) 2013-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALSVD2X2_H +#define EIGEN_REALSVD2X2_H + +namespace Eigen { + +namespace internal { + +template +void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, + JacobiRotation *j_left, + JacobiRotation *j_right) +{ + using std::sqrt; + using std::abs; + Matrix m; + m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), + numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); + JacobiRotation rot1; + RealScalar t = m.coeff(0,0) + m.coeff(1,1); + RealScalar d = m.coeff(1,0) - m.coeff(0,1); + if(d == RealScalar(0)) + { + rot1.s() = RealScalar(0); + rot1.c() = RealScalar(1); + } + else + { + // If d!=0, then t/d cannot overflow because the magnitude of the + // entries forming d are not too small compared to the ones forming t. + RealScalar u = t / d; + RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); + rot1.s() = RealScalar(1) / tmp; + rot1.c() = u / tmp; + } + m.applyOnTheLeft(0,1,rot1); + j_right->makeJacobi(m,0,1); + *j_left = rot1 * j_right->transpose(); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_REALSVD2X2_H \ No newline at end of file From 2bd59b0e0d667dcdcb6b070596a1bf023e3e88f1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:12:03 +0200 Subject: [PATCH 10/86] Take advantage that T is already diagonal in the extraction of generalized complex eigenvalues. --- .../src/Eigenvalues/GeneralizedEigenSolver.h | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 08caed281..9f43fd544 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,24 +327,13 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T - // From the eigen decomposition of T = U * E * U^-1, - // we can extract the eigenvalues of (U^-1 * S * U) / E - // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)]. - // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00): + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T + // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00): - // T = [a b ; 0 c] - // S = [e f ; g h] - RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1); - RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); - RealScalar d = c-a; - RealScalar gb = g*b; - Matrix S2; - S2 << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, - g*c , (gb+h*d)*a; - - // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, - // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): + // T = [a 0] + // [0 b] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i+1, i+1); + Matrix S2 = m_matS.template block<2,2>(i,i) * Matrix(b,a).asDiagonal(); Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1)); Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1))); @@ -352,7 +341,7 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z); m_betas.coeffRef(i) = - m_betas.coeffRef(i+1) = a*c*d; + m_betas.coeffRef(i+1) = a*b; i += 2; } From e2b383632699684d06ae180b3ad85cfb0189973a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:13:33 +0200 Subject: [PATCH 11/86] Include recent changesets that played with product's kernel --- bench/perf_monitoring/gemm/changesets.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index fb3e48e99..d00b4603a 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -44,4 +44,8 @@ before-evaluators 7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) 7591:09a8e2186610 # 3.3-alpha1 7650:b0f3c8f43025 # help clang inlining +8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes +8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path +8985:d935df21a082 # Remove the rotating kernel. From aa33446dace833fbf06632e586c80119b3d8ac11 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:22:27 -0700 Subject: [PATCH 12/86] Improved support for vectorization of 16-bit floats --- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 86 +++++++++++++++++++ .../Eigen/CXX11/src/Tensor/TensorMeta.h | 5 ++ .../CXX11/src/Tensor/TensorReductionCuda.h | 8 +- 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3dd32e9d1..bf52e490f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -84,6 +84,14 @@ struct functor_traits > { }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + // Standard reduction functors template struct SumReducer { @@ -119,6 +127,15 @@ template struct SumReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasAdd + }; +}; + + template struct MeanReducer { static const bool PacketAccess = packet_traits::HasAdd && !NumTraits::IsInteger; @@ -162,6 +179,15 @@ template struct MeanReducer DenseIndex packetCount_; }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasAdd + }; +}; + + template struct MaxReducer { static const bool PacketAccess = packet_traits::HasMax; @@ -195,6 +221,15 @@ template struct MaxReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasMax + }; +}; + + template struct MinReducer { static const bool PacketAccess = packet_traits::HasMin; @@ -228,6 +263,14 @@ template struct MinReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasMin + }; +}; + template struct ProdReducer { @@ -263,6 +306,14 @@ template struct ProdReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = PacketType::type::HasMul + }; +}; + struct AndReducer { @@ -280,6 +331,15 @@ struct AndReducer } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + struct OrReducer { static const bool PacketAccess = false; static const bool IsStateful = false; @@ -295,6 +355,15 @@ struct OrReducer { } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + // Argmin/Argmax reducers template struct ArgMaxTupleReducer { @@ -312,6 +381,15 @@ template struct ArgMaxTupleReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + template struct ArgMinTupleReducer { static const bool PacketAccess = false; @@ -328,6 +406,14 @@ template struct ArgMinTupleReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + // Random number generation namespace { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index b1645d56f..82a905a65 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -54,6 +54,11 @@ struct PacketType { // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +template <> + struct PacketType { + typedef half2 type; + static const int size = 2; + }; template <> struct PacketType { typedef float4 type; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index e82530955..1b4fdd03f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -331,7 +331,7 @@ struct FullReducer { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || - (internal::is_same::value && Op::PacketAccess)); + (internal::is_same::value && reducer_traits::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; @@ -346,7 +346,7 @@ struct FullReducer { return; } - FullReductionLauncher::run(self, reducer, device, output, num_coeffs); + FullReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs); } }; @@ -608,7 +608,7 @@ struct InnerReducer { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || - (internal::is_same::value && Op::PacketAccess)); + (internal::is_same::value && reducer_traits::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; @@ -627,7 +627,7 @@ struct InnerReducer { return true; } - return InnerReductionLauncher::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); + return InnerReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); } }; From 8f92c26319a6e06cce6d0ba2a252521c8096a2c0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:23:42 -0700 Subject: [PATCH 13/86] Improved code formatting --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 61df8032d..0d084141d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -122,7 +122,7 @@ struct TensorEvaluator, Device> { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_dimensions; + return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { From 14a112ee153f7d8554c3a8848e8a0461c7b82f13 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:25:22 -0700 Subject: [PATCH 14/86] Use signed integers more consistently to encode the number of threads to use to evaluate a tensor expression. --- .../CXX11/src/Tensor/TensorContractionThreadPool.h | 12 ++++++------ .../Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index a60a17049..ee16cde9b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -202,7 +202,7 @@ struct TensorEvaluator::numThreads( + int num_threads = TensorCostModel::numThreads( static_cast(n) * m, cost, this->m_device.numThreads()); // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost @@ -301,7 +301,7 @@ struct TensorEvaluator f) const { typedef TensorCostModel CostModel; if (n <= 1 || numThreads() == 1 || - CostModel::numThreads(n, cost, numThreads()) == 1) { + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { f(0, n); return; } @@ -242,7 +242,7 @@ struct ThreadPoolDevice { // Recursively divide size into halves until we reach block_size. // Division code rounds mid to block_size, so we are guaranteed to get // block_count leaves that do actual computations. - Barrier barrier(block_count); + Barrier barrier(static_cast(block_count)); std::function handleRange; handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { if (last - first <= block_size) { @@ -268,7 +268,7 @@ struct ThreadPoolDevice { private: ThreadPoolInterface* pool_; - size_t num_threads_; + int num_threads_; }; From 66796e843df723eeac04d6dc725f6a8b27a574ba Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:50:01 -0700 Subject: [PATCH 15/86] Fixed definition of some of the reducer_traits --- unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index bf52e490f..e6ff70460 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -131,7 +131,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasAdd + PacketAccess = packet_traits::type>::HasAdd }; }; @@ -183,7 +183,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasAdd + PacketAccess = packet_traits::type>::HasAdd }; }; @@ -225,7 +225,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasMax + PacketAccess = packet_traits::type>::HasMax }; }; @@ -267,7 +267,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasMin + PacketAccess = packet_traits::type>::HasMin }; }; @@ -310,7 +310,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::MulCost, - PacketAccess = PacketType::type::HasMul + PacketAccess = packet_traits::type>::HasMul }; }; From 37638dafd71e39407d22d4269b32d1c73b84feb8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 10:29:52 -0700 Subject: [PATCH 16/86] Simplified the code that dispatches vectorized reductions on GPU --- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 10 ++--- .../Eigen/CXX11/src/Tensor/TensorMeta.h | 38 ++++++++++++------- .../CXX11/src/Tensor/TensorReductionCuda.h | 2 +- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index e6ff70460..a8e48fced 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -131,7 +131,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasAdd + PacketAccess = PacketType::HasAdd }; }; @@ -183,7 +183,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasAdd + PacketAccess = PacketType::HasAdd }; }; @@ -225,7 +225,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasMax + PacketAccess = PacketType::HasMax }; }; @@ -267,7 +267,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasMin + PacketAccess = PacketType::HasMin }; }; @@ -310,7 +310,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::type>::HasMul + PacketAccess = PacketType::HasMul }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 82a905a65..0f3778e6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -47,27 +47,39 @@ template <> struct max_n_1<0> { // Default packet types template -struct PacketType { +struct PacketType : internal::packet_traits { typedef typename internal::packet_traits::type type; - enum { size = internal::unpacket_traits::size }; }; // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <> - struct PacketType { +struct PacketType { typedef half2 type; static const int size = 2; - }; -template <> -struct PacketType { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType { - typedef double2 type; - static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; }; #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 1b4fdd03f..d9bbcd858 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -328,7 +328,7 @@ struct FullReducer { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats and half floats. - #ifdef EIGEN_HAS_CUDA_FP16 +#ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || (internal::is_same::value && reducer_traits::PacketAccess)); From 1fc2746417c8b4bf1645c703b1f99b1871c8d16e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 22:52:37 +0200 Subject: [PATCH 17/86] Make Arrays's ctor/assignment noexcept --- Eigen/src/Core/Array.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index c0af4aa9d..7c2e0de16 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -149,7 +149,7 @@ class Array #if EIGEN_HAS_RVALUE_REFERENCES EIGEN_DEVICE_FUNC - Array(Array&& other) + Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) : Base(std::move(other)) { Base::_check_template_params(); @@ -157,7 +157,7 @@ class Array Base::_set_noalias(other); } EIGEN_DEVICE_FUNC - Array& operator=(Array&& other) + Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { other.swap(*this); return *this; From bd212438217dc3e169a35052f78e2e41a7ce3a3d Mon Sep 17 00:00:00 2001 From: Sean Templeton Date: Fri, 3 Jun 2016 10:51:35 -0500 Subject: [PATCH 18/86] Fix compile errors initializing packets on ARM DS-5 5.20 The ARM DS-5 5.20 compiler fails compiling with the following errors: "src/Core/arch/NEON/PacketMath.h", line 113: Error: #146: too many initializer values Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); ^ "src/Core/arch/NEON/PacketMath.h", line 118: Error: #146: too many initializer values Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); ^ "src/Core/arch/NEON/Complex.h", line 30: Error: #146: too many initializer values static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); ^ "src/Core/arch/NEON/Complex.h", line 31: Error: #146: too many initializer values static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); ^ The vectors are implemented as two doubles, hence the too many initializer values error. Changed the code to use intrinsic load functions which all compilers implementing NEON should have. --- Eigen/src/Core/arch/NEON/Complex.h | 5 +++-- Eigen/src/Core/arch/NEON/PacketMath.h | 15 +++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d2d467936..234f29b80 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -14,8 +14,9 @@ namespace Eigen { namespace internal { -static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); -static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); +const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA ); +static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA ); //---------- float ---------- struct Packet2cf diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index deb2d7e42..fa16bc9c8 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -51,15 +51,12 @@ typedef uint32x4_t Packet4ui; #if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG //Special treatment for Apple's llvm-gcc, its NEON packet types are unions - #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} + #define EIGEN_INIT_NEON_PACKET2D(X, Y) {{X, Y}} #else //Default initializer for packets - #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} + #define EIGEN_INIT_NEON_PACKET2D(X, Y) {X, Y} #endif - // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC @@ -122,12 +119,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { - Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const float32_t f[] = {0, 1, 2, 3}; + Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { - Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const int32_t i[] = {0, 1, 2, 3}; + Packet4i countdown = vld1q_s32(i); return vaddq_s32(pset1(a), countdown); } @@ -585,7 +584,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { r template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { - Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1); + Packet2d countdown = EIGEN_INIT_NEON_PACKET2D(0, 1); return vaddq_f64(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } From 76308e7fd277ad962a87724040670da827a27db4 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 3 Jun 2016 16:28:58 -0700 Subject: [PATCH 19/86] Add CurrentThreadId and NumThreads methods to Eigen threadpools and TensorDeviceThreadPool. --- .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 4 +++ .../src/ThreadPool/NonBlockingThreadPool.h | 26 +++++++++++---- .../CXX11/src/ThreadPool/SimpleThreadPool.h | 32 +++++++++++++++++-- .../src/ThreadPool/ThreadPoolInterface.h | 7 ++++ .../test/cxx11_non_blocking_thread_pool.cpp | 5 +++ 5 files changed, 65 insertions(+), 9 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index d31b0ad38..90fded8ad 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -172,6 +172,10 @@ struct ThreadPoolDevice { pool_->Schedule(func); } + EIGEN_STRONG_INLINE size_t currentThreadId() const { + return pool_->CurrentThreadId(); + } + // parallelFor executes f with [0, n) arguments in parallel and waits for // completion. F accepts a half-open interval [first, last). // Block size is choosen based on the iteration cost and resulting parallel diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index c094563b7..1465878b7 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -74,7 +74,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { PerThread* pt = GetPerThread(); if (pt->pool == this) { // Worker thread of this pool, push onto the thread's queue. - Queue* q = queues_[pt->index]; + Queue* q = queues_[pt->thread_id]; t = q->PushFront(std::move(t)); } else { // A free-standing thread (or worker of another pool), push onto a random @@ -95,13 +95,27 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { env_.ExecuteTask(t); // Push failed, execute directly. } + size_t NumThreads() const final { + return threads_.size(); + } + + size_t CurrentThreadId() const { + const PerThread* pt = + const_cast(this)->GetPerThread(); + if (pt->pool == this) { + return static_cast(pt->thread_id); + } else { + return threads_.size(); + } + } + private: typedef typename Environment::EnvThread Thread; struct PerThread { bool inited; NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. - unsigned index; // Worker thread index in pool. + unsigned thread_id; // Worker thread index in pool. unsigned rand; // Random generator state. }; @@ -116,12 +130,12 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { EventCount ec_; // Main worker thread loop. - void WorkerLoop(unsigned index) { + void WorkerLoop(unsigned thread_id) { PerThread* pt = GetPerThread(); pt->pool = this; - pt->index = index; - Queue* q = queues_[index]; - EventCount::Waiter* waiter = &waiters_[index]; + pt->thread_id = thread_id; + Queue* q = queues_[thread_id]; + EventCount::Waiter* waiter = &waiters_[thread_id]; for (;;) { Task t = q->PopFront(); if (!t.f) { diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h index 17fd1658b..fde80afdf 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h @@ -24,7 +24,7 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { explicit SimpleThreadPoolTempl(int num_threads, Environment env = Environment()) : env_(env), threads_(num_threads), waiters_(num_threads) { for (int i = 0; i < num_threads; i++) { - threads_.push_back(env.CreateThread([this]() { WorkerLoop(); })); + threads_.push_back(env.CreateThread([this, i]() { WorkerLoop(i); })); } } @@ -55,7 +55,7 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { // Schedule fn() for execution in the pool of threads. The functions are // executed in the order in which they are scheduled. - void Schedule(std::function fn) { + void Schedule(std::function fn) final { Task t = env_.CreateTask(std::move(fn)); std::unique_lock l(mu_); if (waiters_.empty()) { @@ -69,9 +69,25 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { } } + size_t NumThreads() const final { + return threads_.size(); + } + + size_t CurrentThreadId() const final { + const PerThread* pt = this->GetPerThread(); + if (pt->pool == this) { + return pt->thread_id; + } else { + return threads_.size(); + } + } + protected: - void WorkerLoop() { + void WorkerLoop(size_t thread_id) { std::unique_lock l(mu_); + PerThread* pt = GetPerThread(); + pt->pool = this; + pt->thread_id = thread_id; Waiter w; Task t; while (!exiting_) { @@ -111,6 +127,11 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { bool ready; }; + struct PerThread { + ThreadPoolTempl* pool; // Parent pool, or null for normal threads. + size_t thread_id; // Worker thread index in pool. + }; + Environment env_; std::mutex mu_; MaxSizeVector threads_; // All threads @@ -118,6 +139,11 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { std::deque pending_; // Queue of pending work std::condition_variable empty_; // Signaled on pending_.empty() bool exiting_ = false; + + PerThread* GetPerThread() const { + static EIGEN_THREAD_LOCAL PerThread per_thread; + return &per_thread; + } }; typedef SimpleThreadPoolTempl SimpleThreadPool; diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h index 38b40aceb..b1beccdde 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h @@ -18,6 +18,13 @@ class ThreadPoolInterface { public: virtual void Schedule(std::function fn) = 0; + // Returns the number of threads in the pool. + virtual size_t NumThreads() const = 0; + + // Returns a logical thread index between 0 and NumThreads() - 1 if called + // from one of the threads in the pool. Returns NumThreads() otherwise. + virtual size_t CurrentThreadId() const = 0; + virtual ~ThreadPoolInterface() {} }; diff --git a/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/unsupported/test/cxx11_non_blocking_thread_pool.cpp index 6569218c4..844a1fbf4 100644 --- a/unsupported/test/cxx11_non_blocking_thread_pool.cpp +++ b/unsupported/test/cxx11_non_blocking_thread_pool.cpp @@ -27,6 +27,8 @@ static void test_parallelism() // Test we never-ever fail to match available tasks with idle threads. const int kThreads = 16; // code below expects that this is a multiple of 4 NonBlockingThreadPool tp(kThreads); + VERIFY_IS_EQUAL(tp.NumThreads(), kThreads); + VERIFY_IS_EQUAL(tp.CurrentThreadId(), kThreads); for (int iter = 0; iter < 100; ++iter) { std::atomic running(0); std::atomic done(0); @@ -34,6 +36,9 @@ static void test_parallelism() // Schedule kThreads tasks and ensure that they all are running. for (int i = 0; i < kThreads; ++i) { tp.Schedule([&]() { + const size_t thread_id = tp.CurrentThreadId(); + VERIFY_GE(thread_id, 0); + VERIFY_LE(thread_id, kThreads - 1); running++; while (phase < 1) { } From f1f2ff8208f82680aabd9e191810d0cd10be9048 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 3 Jun 2016 18:06:37 -0700 Subject: [PATCH 20/86] size_t -> int --- .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 2 +- .../src/ThreadPool/NonBlockingThreadPool.h | 18 +++++++++--------- .../CXX11/src/ThreadPool/SimpleThreadPool.h | 16 ++++++++-------- .../CXX11/src/ThreadPool/ThreadPoolInterface.h | 4 ++-- .../test/cxx11_non_blocking_thread_pool.cpp | 2 +- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 90fded8ad..9073c611a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -172,7 +172,7 @@ struct ThreadPoolDevice { pool_->Schedule(func); } - EIGEN_STRONG_INLINE size_t currentThreadId() const { + EIGEN_STRONG_INLINE int currentThreadId() const { return pool_->CurrentThreadId(); } diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 1465878b7..8bc986c84 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -95,17 +95,17 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { env_.ExecuteTask(t); // Push failed, execute directly. } - size_t NumThreads() const final { - return threads_.size(); + int NumThreads() const final { + return static_cast(threads_.size()); } - size_t CurrentThreadId() const { + int CurrentThreadId() const { const PerThread* pt = const_cast(this)->GetPerThread(); if (pt->pool == this) { - return static_cast(pt->thread_id); + return pt->thread_id; } else { - return threads_.size(); + return NumThreads(); } } @@ -114,9 +114,9 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { struct PerThread { bool inited; - NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. - unsigned thread_id; // Worker thread index in pool. - unsigned rand; // Random generator state. + NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. + int thread_id; // Worker thread index in pool. + unsigned rand; // Random generator state. }; Environment env_; @@ -130,7 +130,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { EventCount ec_; // Main worker thread loop. - void WorkerLoop(unsigned thread_id) { + void WorkerLoop(int thread_id) { PerThread* pt = GetPerThread(); pt->pool = this; pt->thread_id = thread_id; diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h index fde80afdf..36eb6950f 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h @@ -69,21 +69,21 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { } } - size_t NumThreads() const final { - return threads_.size(); + int NumThreads() const final { + return static_cast(threads_.size()); } - size_t CurrentThreadId() const final { + int CurrentThreadId() const final { const PerThread* pt = this->GetPerThread(); if (pt->pool == this) { return pt->thread_id; } else { - return threads_.size(); + return NumThreads(); } } protected: - void WorkerLoop(size_t thread_id) { + void WorkerLoop(int thread_id) { std::unique_lock l(mu_); PerThread* pt = GetPerThread(); pt->pool = this; @@ -129,15 +129,15 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { struct PerThread { ThreadPoolTempl* pool; // Parent pool, or null for normal threads. - size_t thread_id; // Worker thread index in pool. + int thread_id; // Worker thread index in pool. }; Environment env_; std::mutex mu_; MaxSizeVector threads_; // All threads MaxSizeVector waiters_; // Stack of waiting threads. - std::deque pending_; // Queue of pending work - std::condition_variable empty_; // Signaled on pending_.empty() + std::deque pending_; // Queue of pending work + std::condition_variable empty_; // Signaled on pending_.empty() bool exiting_ = false; PerThread* GetPerThread() const { diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h index b1beccdde..569cd4bc8 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h @@ -19,11 +19,11 @@ class ThreadPoolInterface { virtual void Schedule(std::function fn) = 0; // Returns the number of threads in the pool. - virtual size_t NumThreads() const = 0; + virtual int NumThreads() const = 0; // Returns a logical thread index between 0 and NumThreads() - 1 if called // from one of the threads in the pool. Returns NumThreads() otherwise. - virtual size_t CurrentThreadId() const = 0; + virtual int CurrentThreadId() const = 0; virtual ~ThreadPoolInterface() {} }; diff --git a/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/unsupported/test/cxx11_non_blocking_thread_pool.cpp index 844a1fbf4..6e4e5cbab 100644 --- a/unsupported/test/cxx11_non_blocking_thread_pool.cpp +++ b/unsupported/test/cxx11_non_blocking_thread_pool.cpp @@ -36,7 +36,7 @@ static void test_parallelism() // Schedule kThreads tasks and ensure that they all are running. for (int i = 0; i < kThreads; ++i) { tp.Schedule([&]() { - const size_t thread_id = tp.CurrentThreadId(); + const int thread_id = tp.CurrentThreadId(); VERIFY_GE(thread_id, 0); VERIFY_LE(thread_id, kThreads - 1); running++; From 5b95b4daf9933063c57c24c2338b043ec257d461 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 5 Jun 2016 12:57:48 -0700 Subject: [PATCH 21/86] Moved static assertions into the class constructor to make the code more portable --- Eigen/src/Core/CwiseTernaryOp.h | 41 +++++++++++---------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/Eigen/src/Core/CwiseTernaryOp.h b/Eigen/src/Core/CwiseTernaryOp.h index fe71c07cf..9f3576fec 100644 --- a/Eigen/src/Core/CwiseTernaryOp.h +++ b/Eigen/src/Core/CwiseTernaryOp.h @@ -34,22 +34,6 @@ struct traits > { typedef typename result_of::type Scalar; - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageIndex, - typename internal::traits::StorageIndex>::value), - STORAGE_INDEX_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageIndex, - typename internal::traits::StorageIndex>::value), - STORAGE_INDEX_MUST_MATCH) typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::StorageIndex StorageIndex; @@ -100,18 +84,8 @@ template ::StorageKind>, - internal::no_assignment_operator { - EIGEN_STATIC_ASSERT( - (internal::is_same< - typename internal::traits::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same< - typename internal::traits::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - + internal::no_assignment_operator +{ public: typedef typename internal::remove_all::type Arg1; typedef typename internal::remove_all::type Arg2; @@ -137,6 +111,17 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl< // require the sizes to match EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && a1.rows() == a3.rows() && a1.cols() == a3.cols()); } From 1f1e0b9e30a175c7a3197ffc87898404dda7c45e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 5 Jun 2016 12:59:11 -0700 Subject: [PATCH 22/86] Silenced compilation warning --- Eigen/src/Core/SpecialFunctions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 0dd9a1dc3..a657cb854 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -1050,8 +1050,8 @@ struct betainc_impl { template struct betainc_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) { - /* betaincf.c + static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) { + /* betaincf.c * * Incomplete beta integral * From 66e99ab6a1444d8e3d47211e4540837e6b982a3a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 15:11:41 +0200 Subject: [PATCH 23/86] Relax mixing-type constraints for binary coefficient-wise operators: - Replace internal::scalar_product_traits by Eigen::ScalarBinaryOpTraits - Remove the "functor_is_product_like" helper (was pretty ugly) - Currently, OP is not used, but it is available to the user for fine grained tuning - Currently, only the following operators have been generalized: *,/,+,-,=,*=,/=,+=,-= - TODO: generalize all other binray operators (comparisons,pow,etc.) - TODO: handle "scalar op array" operators (currently only * is handled) - TODO: move the handling of the "void" scalar type to ScalarBinaryOpTraits --- Eigen/src/Core/ArrayBase.h | 4 +- Eigen/src/Core/AssignEvaluator.h | 12 ++-- Eigen/src/Core/CwiseBinaryOp.h | 4 +- Eigen/src/Core/DiagonalMatrix.h | 6 +- Eigen/src/Core/Dot.h | 12 ++-- Eigen/src/Core/EigenBase.h | 4 +- Eigen/src/Core/MatrixBase.h | 4 +- Eigen/src/Core/NoAlias.h | 6 +- Eigen/src/Core/PlainObjectBase.h | 2 +- Eigen/src/Core/Product.h | 3 +- Eigen/src/Core/ProductEvaluators.h | 55 ++++++++-------- Eigen/src/Core/Redux.h | 4 +- Eigen/src/Core/Ref.h | 2 +- Eigen/src/Core/SelfCwiseBinaryOp.h | 10 +-- Eigen/src/Core/Solve.h | 13 ++-- Eigen/src/Core/TriangularMatrix.h | 20 +++--- Eigen/src/Core/VectorwiseOp.h | 4 +- Eigen/src/Core/functors/AssignmentFunctors.h | 54 ++++++++-------- Eigen/src/Core/functors/BinaryFunctors.h | 64 ++++++++----------- .../Core/products/GeneralBlockPanelKernel.h | 4 +- Eigen/src/Core/products/GeneralMatrixMatrix.h | 4 +- .../products/GeneralMatrixMatrixTriangular.h | 4 +- Eigen/src/Core/products/GeneralMatrixVector.h | 4 +- .../Core/products/TriangularMatrixVector.h | 4 +- Eigen/src/Core/util/ForwardDeclarations.h | 5 +- Eigen/src/Core/util/Macros.h | 4 +- Eigen/src/Core/util/Meta.h | 64 +++++++++++-------- Eigen/src/Core/util/XprHelper.h | 10 +-- Eigen/src/Geometry/AlignedBox.h | 6 +- Eigen/src/Geometry/Homogeneous.h | 12 ++-- Eigen/src/Householder/HouseholderSequence.h | 2 +- .../IterativeLinearSolvers/SolveWithGuess.h | 4 +- Eigen/src/LU/FullPivLU.h | 6 +- Eigen/src/LU/InverseImpl.h | 6 +- Eigen/src/LU/PartialPivLU.h | 6 +- Eigen/src/QR/ColPivHouseholderQR.h | 4 +- .../src/QR/CompleteOrthogonalDecomposition.h | 4 +- Eigen/src/QR/FullPivHouseholderQR.h | 4 +- Eigen/src/SparseCore/SparseAssign.h | 20 +++--- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 20 +++--- Eigen/src/SparseCore/SparseDenseProduct.h | 4 +- Eigen/src/SparseCore/SparseMatrix.h | 4 +- Eigen/src/SparseCore/SparseMatrixBase.h | 2 +- Eigen/src/SparseCore/SparseProduct.h | 12 ++-- Eigen/src/SparseCore/SparseSelfAdjointView.h | 10 +-- Eigen/src/SparseQR/SparseQR.h | 8 +-- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 4 +- Eigen/src/plugins/CommonCwiseBinaryOps.h | 18 +++++- Eigen/src/plugins/CommonCwiseUnaryOps.h | 6 +- Eigen/src/plugins/MatrixCwiseBinaryOps.h | 4 +- blas/PackedTriangularMatrixVector.h | 4 +- test/array.cpp | 2 +- test/array_for_matrix.cpp | 2 +- test/mixingtypes.cpp | 32 +++++++--- test/vectorization_logic.cpp | 7 +- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 56 ++++++++-------- .../KroneckerProduct/KroneckerTensorProduct.h | 4 +- 57 files changed, 345 insertions(+), 314 deletions(-) diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 3d9c37bf6..62851a0c2 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -176,7 +176,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -189,7 +189,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 4b914ac0c..f966724cc 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -687,7 +687,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_dense_assignment_loop(dst, src, internal::assign_op()); + call_dense_assignment_loop(dst, src, internal::assign_op()); } /*************************************************************************** @@ -722,13 +722,13 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } // Deal with "assume-aliasing" @@ -787,7 +787,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias(Dst& dst, const Src& src) { - call_assignment_no_alias(dst, src, internal::assign_op()); + call_assignment_no_alias(dst, src, internal::assign_op()); } template @@ -809,7 +809,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) { - call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); } // forward declaration @@ -838,7 +838,7 @@ template< typename DstXprType, typename SrcXprType, typename Functor, typename S struct Assignment { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); src.evalTo(dst); diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 39820fd7d..aa3297354 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -160,7 +160,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -173,7 +173,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 5a9e3abd4..aa619dd5c 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -320,16 +320,16 @@ template<> struct AssignmentKind { typedef Diagonal2De template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.setZero(); dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index f3c869635..1d7f2262e 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -28,22 +28,24 @@ template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.template binaryExpr(b).sum(); } }; template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.transpose().template binaryExpr(b).sum(); } }; @@ -62,7 +64,7 @@ struct dot_nocheck template template EIGEN_DEVICE_FUNC -typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType +typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index ba8e09674..f76995af9 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -138,7 +138,7 @@ template template Derived& DenseBase::operator+=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -146,7 +146,7 @@ template template Derived& DenseBase::operator-=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index b8b7f458f..f63505fef 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -193,7 +193,7 @@ template class MatrixBase template EIGEN_DEVICE_FUNC - typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType + typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; @@ -381,7 +381,7 @@ template class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN /// \internal helper struct to form the return type of the cross product template struct cross_product_return_type { - typedef typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; typedef Matrix type; }; #endif // EIGEN_PARSED_BY_DOXYGEN diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index ffb673cee..33908010b 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -39,7 +39,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); return m_expression; } @@ -47,7 +47,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } @@ -55,7 +55,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 570dbd53b..64f5eb052 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -718,7 +718,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); return this->derived(); } diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 8aa1de081..bad289761 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -18,11 +18,12 @@ namespace internal { // Determine the scalar of Product. This is normally the same as Lhs::Scalar times // Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. +// TODO: this could be removed once ScalarBinaryOpTraits handles void. template::Shape, typename RhsShape = typename evaluator_traits::Shape > struct product_result_scalar { - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; }; template diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index cc7166062..7f041e5dd 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -124,12 +124,12 @@ protected: // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, +struct Assignment, internal::assign_op::Scalar>, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); @@ -138,12 +138,12 @@ struct Assignment, internal::assign_op -struct Assignment, internal::add_assign_op, Dense2Dense, +struct Assignment, internal::add_assign_op::Scalar>, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); @@ -152,12 +152,12 @@ struct Assignment, internal::add_assign_op< // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Dense, +struct Assignment, internal::sub_assign_op::Scalar>, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); @@ -187,37 +187,38 @@ struct Assignment" as well. template -struct evaluator_assume_aliasing, const OtherXpr, +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, const Product >, DenseShape > { static const bool value = true; }; -template +template struct assignment_from_xpr_plus_product { - typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + template static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) { - call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.lhs(), Func1()); call_assignment_no_alias(dst, src.rhs(), Func2()); } }; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::assign_op, internal::add_assign_op > {}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::add_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::add_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::add_assign_op, internal::add_assign_op > {}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::sub_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::sub_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::sub_assign_op, internal::sub_assign_op > {}; //---------------------------------------- @@ -369,21 +370,21 @@ struct generic_product_impl { // Same as: dst.noalias() = lhs.lazyProduct(rhs); // but easier on the compiler side - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); } template static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() += lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } template static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() -= lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); } // template @@ -735,7 +736,7 @@ template { - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; public: enum { CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 7984cd6e1..ec969d9b9 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -450,7 +450,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return derived().redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -465,7 +465,7 @@ DenseBase::mean() const #pragma warning push #pragma warning ( disable : 2259 ) #endif - return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); #ifdef __INTEL_COMPILER #pragma warning pop #endif diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 6e94181f3..17065fdd5 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -262,7 +262,7 @@ template class Ref< template EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 78fff1549..719ed72a5 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -12,11 +12,13 @@ namespace Eigen { +// TODO generalize the scalar type of 'other' + template EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); return derived(); } @@ -24,7 +26,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); return derived(); } @@ -32,7 +34,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); return derived(); } @@ -40,7 +42,7 @@ template EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); return derived(); } diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index ba2ee53b8..038ad5b11 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -134,10 +134,10 @@ protected: // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? src.dec()._solve_impl(src.rhs(), dst); @@ -146,10 +146,10 @@ struct Assignment, internal::assign_op -struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> { typedef Solve,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } @@ -157,10 +157,11 @@ struct Assignment,RhsType>, internal: // Specialization for "dst = dec.adjoint().solve(rhs)" template -struct Assignment, const Transpose >,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, const Transpose >,RhsType>, + internal::assign_op, Dense2Dense, Scalar> { typedef Solve, const Transpose >,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 5c5e5028e..8731e9127 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -367,14 +367,14 @@ template class TriangularViewImpl<_Mat template EIGEN_DEVICE_FUNC TriangularViewType& operator+=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } /** \sa MatrixBase::operator-=() */ template EIGEN_DEVICE_FUNC TriangularViewType& operator-=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -552,7 +552,7 @@ template inline TriangularView& TriangularViewImpl::operator=(const MatrixBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -804,7 +804,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_triangular_assignment_loop(dst, src, internal::assign_op()); + call_triangular_assignment_loop(dst, src, internal::assign_op()); } template<> struct AssignmentKind { typedef Triangular2Triangular Kind; }; @@ -933,10 +933,10 @@ namespace internal { // Triangular = Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.setZero(); dst._assignProduct(src, 1); @@ -945,10 +945,10 @@ struct Assignment, internal::assign_ // Triangular += Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::add_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::add_assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { dst._assignProduct(src, 1); } @@ -956,10 +956,10 @@ struct Assignment, internal::add_ass // Triangular -= Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::sub_assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { dst._assignProduct(src, -1); } diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 193891189..00a4a8c39 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -540,7 +540,7 @@ template class VectorwiseOp /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */ template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator+(const DenseBase& other) const @@ -553,7 +553,7 @@ template class VectorwiseOp /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */ template EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator-(const DenseBase& other) const diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index 51fef50e8..9b373c783 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -18,20 +18,24 @@ namespace internal { * \brief Template functor for scalar/packet assignment * */ -template struct assign_op { +template struct assign_op { EIGEN_EMPTY_STRUCT_CTOR(assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,b); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,b); } }; -template -struct functor_traits > { + +// Empty overload for void type (used by PermutationMatrix +template struct assign_op {}; + +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost, - PacketAccess = packet_traits::Vectorizable + Cost = NumTraits::ReadCost, + PacketAccess = is_same::value && packet_traits::Vectorizable && packet_traits::Vectorizable }; }; @@ -39,20 +43,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with addition * */ -template struct add_assign_op { +template struct add_assign_op { EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasAdd }; }; @@ -60,20 +64,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with subtraction * */ -template struct sub_assign_op { +template struct sub_assign_op { EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasSub }; }; @@ -98,7 +102,6 @@ struct functor_traits > { PacketAccess = is_same::value && packet_traits::HasMul }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with diviving @@ -120,7 +123,6 @@ struct functor_traits > { PacketAccess = is_same::value && packet_traits::HasDiv }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with swapping diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 6eb5b91ce..98fcebae5 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -21,22 +21,23 @@ namespace internal { * * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum() */ -template struct scalar_sum_op { -// typedef Scalar result_type; +template struct scalar_sum_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::padd(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, // rough estimate! + PacketAccess = is_same::value && packet_traits::HasAdd && packet_traits::HasAdd + // TODO vectorize mixed sum }; }; @@ -45,7 +46,7 @@ struct functor_traits > { * This is required to solve Bug 426. * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast() */ -template<> struct scalar_sum_op : scalar_sum_op { +template<> struct scalar_sum_op : scalar_sum_op { EIGEN_DEPRECATED scalar_sum_op() {} }; @@ -57,11 +58,7 @@ template<> struct scalar_sum_op : scalar_sum_op { * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() */ template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template @@ -75,7 +72,8 @@ template struct functor_traits > { enum { Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable + PacketAccess = is_same::value && packet_traits::HasMul && packet_traits::HasMul + // TODO vectorize mixed product }; }; @@ -90,7 +88,7 @@ template struct scalar_conj_product_op { Conj = NumTraits::IsComplex }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const @@ -269,18 +267,19 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::operator- */ -template struct scalar_difference_op { +template struct scalar_difference_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::psub(a,b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = is_same::value && packet_traits::HasSub && packet_traits::HasSub }; }; @@ -290,11 +289,7 @@ struct functor_traits > { * \sa class CwiseBinaryOp, Cwise::operator/() */ template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template @@ -305,7 +300,7 @@ template struct functor_traits > { typedef typename scalar_quotient_op::result_type result_type; enum { - PacketAccess = scalar_quotient_op::Vectorizable, + PacketAccess = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv, Cost = NumTraits::template Div::Cost }; }; @@ -446,7 +441,7 @@ struct functor_traits > template struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } @@ -481,7 +476,7 @@ struct functor_traits > template struct scalar_quotient2_op { - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; } @@ -491,15 +486,6 @@ template struct functor_traits > { enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - /** \internal * \brief Template functor to add a scalar to a fixed other one diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 253c03462..63a9fc462 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -363,7 +363,7 @@ class gebp_traits public: typedef _LhsScalar LhsScalar; typedef _RhsScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, @@ -478,7 +478,7 @@ class gebp_traits, RealScalar, _ConjLhs, false> public: typedef std::complex LhsScalar; typedef RealScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 7528fef24..b1465c3b5 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -25,7 +25,7 @@ struct general_matrix_matrix_product Traits; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride, @@ -55,7 +55,7 @@ struct general_matrix_matrix_product Traits; -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 80ba89465..29d6dc721 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -40,7 +40,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) @@ -57,7 +57,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index fc8886511..4a5cf3fb6 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -58,7 +58,7 @@ namespace internal { template struct general_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable @@ -334,7 +334,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product struct general_matrix_vector_product { -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index f79840aa7..c11a983c7 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -20,7 +20,7 @@ struct triangular_matrix_vector_product; template struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -91,7 +91,7 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 42e2e75b9..045e22658 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -131,6 +131,7 @@ template class ArrayWrapper; template class MatrixWrapper; template class SolverBase; template class InnerIterator; +template struct ScalarBinaryOpTraits; namespace internal { template struct kernel_retval_base; @@ -175,8 +176,8 @@ namespace internal { // with optional conjugation of the arguments. template struct conj_helper; -template struct scalar_sum_op; -template struct scalar_difference_op; +template struct scalar_sum_op; +template struct scalar_difference_op; template struct scalar_conj_product_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index c9a0b9893..35547fdda 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -885,9 +885,9 @@ namespace Eigen { } // the expression type of a cwise product -#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \ +#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ - internal::scalar_product_op< \ + EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ typename internal::traits::Scalar, \ typename internal::traits::Scalar \ >, \ diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 7ecd59add..af661c313 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -375,33 +375,6 @@ template struct scalar_product_traits enum { Defined = 0 }; }; -template struct scalar_product_traits -{ - enum { - // Cost = NumTraits::MulCost, - Defined = 1 - }; - typedef T ReturnType; -}; - -template struct scalar_product_traits > -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - -template struct scalar_product_traits, T> -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - // FIXME quick workaround around current limitation of result_of // template // struct result_of(ArgType0,ArgType1)> { @@ -434,6 +407,43 @@ T div_ceil(const T &a, const T &b) } // end namespace numext + +/** \class ScalarBinaryOpTraits + * \ingroup Core_Module + * + * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is. + * + * \sa CwiseBinaryOp + */ +template +struct ScalarBinaryOpTraits +#ifndef EIGEN_PARSED_BY_DOXYGEN + // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class. + : internal::scalar_product_traits +#endif // EIGEN_PARSED_BY_DOXYGEN +{}; + +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +template +struct ScalarBinaryOpTraits,BinaryOp> +{ + enum { Defined = 1 }; + typedef std::complex ReturnType; +}; + +template +struct ScalarBinaryOpTraits, T,BinaryOp> +{ + enum { Defined = 1 }; + typedef std::complex ReturnType; +}; + } // end namespace Eigen #endif // EIGEN_META_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 3605de6fd..4fd4a9b0d 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -649,17 +649,13 @@ std::string demangle_flags(int f) } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. +// We require Lhs and Rhs to have "compatible" scalar types. // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to // add together a float matrix and a double matrix. +// Treat "void" as a special case. Needed for permutation products. TODO: this should be handled by ScalarBinaryOpTraits #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same_or_void::value)), \ + EIGEN_STATIC_ASSERT(int(internal::is_same_or_void::value) || int(ScalarBinaryOpTraits::Defined), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index 03f1a11f8..aeb043a6c 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -112,7 +112,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** \returns the center of the box */ inline const CwiseUnaryOp, - const CwiseBinaryOp, const VectorType, const VectorType> > + const CwiseBinaryOp, const VectorType, const VectorType> > center() const { return (m_min+m_max)/2; } @@ -120,7 +120,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * Note that this function does not get the same * result for integral or floating scalar types: see */ - inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const + inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const { return m_max - m_min; } /** \returns the volume of the bounding box */ @@ -131,7 +131,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * if the length of the diagonal is needed: diagonal().norm() * will provide it. */ - inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const + inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const { return sizes(); } /** \returns the vertex of the bounding box at the corner defined by diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index cd52b5470..1c35ca486 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -329,10 +329,10 @@ protected: // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template topRows(src.nestedExpression().rows()) = src.nestedExpression(); dst.row(dst.rows()-1).setOnes(); @@ -341,10 +341,10 @@ struct Assignment, internal::assign_op // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template leftCols(src.nestedExpression().cols()) = src.nestedExpression(); dst.col(dst.cols()-1).setOnes(); @@ -373,7 +373,7 @@ struct homogeneous_right_product_refactoring_helper typedef typename Rhs::ConstRowXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template @@ -414,7 +414,7 @@ struct homogeneous_left_product_refactoring_helper typedef typename Lhs::ConstColXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h index a57f81764..3ce0a693d 100644 --- a/Eigen/src/Householder/HouseholderSequence.h +++ b/Eigen/src/Householder/HouseholderSequence.h @@ -108,7 +108,7 @@ struct hseq_side_dependent_impl template struct matrix_type_times_scalar_type { - typedef typename scalar_product_traits::ReturnType + typedef typename ScalarBinaryOpTraits::ReturnType ResultScalar; typedef Matrix Type; diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h index 35923be3d..7d67d3ce2 100644 --- a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h @@ -91,10 +91,10 @@ protected: // Specialization for "dst = dec.solveWithGuess(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef SolveWithGuess SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? dst = src.guess(); diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index c39f8e3d5..2d01b18c6 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -839,12 +839,12 @@ namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef FullPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h index e202a55cb..3134632e1 100644 --- a/Eigen/src/LU/InverseImpl.h +++ b/Eigen/src/LU/InverseImpl.h @@ -286,11 +286,11 @@ struct compute_inverse_and_det_with_check namespace internal { // Specialization for "dense = dense_xpr.inverse()" -template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment, internal::assign_op, Dense2Dense> { typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index b68916287..ac2902261 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -525,12 +525,12 @@ MatrixType PartialPivLU::reconstructedMatrix() const namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef PartialPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 7c559f952..525ee8c18 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -598,11 +598,11 @@ void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType & namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef ColPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h index 230d0d23c..52bcc2173 100644 --- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -510,11 +510,11 @@ void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl( namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef CompleteOrthogonalDecomposition CodType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows())); } diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 32a10f3fe..4f55d52a5 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -560,11 +560,11 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef FullPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 4a8dd12e4..b284fa9e4 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -34,8 +34,8 @@ template inline Derived& SparseMatrixBase::operator=(const SparseMatrixBase& other) { // by default sparse evaluation do not alias, so we can safely bypass the generic call_assignment routine - internal::Assignment > - ::run(derived(), other.derived(), internal::assign_op()); + internal::Assignment > + ::run(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -127,7 +127,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src) template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { assign_sparse_to_sparse(dst.derived(), src.derived()); } @@ -141,7 +141,7 @@ struct Assignment { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - if(internal::is_same >::value) + if(internal::is_same >::value) dst.setZero(); internal::evaluator srcEval(src); @@ -156,10 +156,10 @@ struct Assignment // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error template -struct Assignment, internal::assign_op, Sparse2Sparse, Scalar> +struct Assignment, internal::assign_op, Sparse2Sparse, Scalar> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec()._solve_impl(src.rhs(), dst); } @@ -176,7 +176,7 @@ struct Assignment typedef Array ArrayXI; typedef Array ArrayXS; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { Index size = src.diagonal().size(); dst.makeCompressed(); @@ -187,15 +187,15 @@ struct Assignment } template - static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; } // end namespace internal diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index d422f3cbe..dd21eb8c5 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -579,7 +579,7 @@ template template Derived& SparseMatrixBase::operator+=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -587,7 +587,7 @@ template template Derived& SparseMatrixBase::operator-=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -600,31 +600,31 @@ SparseMatrixBase::cwiseProduct(const MatrixBase &other) c } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator+(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator+(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator-(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator-(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 476796dd7..0547db596 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -74,7 +74,7 @@ struct sparse_time_dense_product_impl let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators // template -// struct scalar_product_traits > +// struct ScalarBinaryOpTraits > // { // enum { // Defined = 1 @@ -97,7 +97,7 @@ struct sparse_time_dense_product_impl::ReturnType rhs_j(alpha * rhs.coeff(j,c)); + typename ScalarBinaryOpTraits::ReturnType rhs_j(alpha * rhs.coeff(j,c)); for(LhsInnerIterator it(lhsEval,j); it ;++it) res.coeffRef(it.index(),c) += it.value() * rhs_j; } diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index a78bd57c3..531fea399 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -440,7 +440,7 @@ class SparseMatrix template void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); - void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } + void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } template void collapseDuplicates(DupFunctor dup_func = DupFunctor()); @@ -979,7 +979,7 @@ template template void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); + internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); } /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 24df36884..45f64e7f2 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -256,7 +256,7 @@ template class SparseMatrixBase Derived& operator/=(const Scalar& other); template struct CwiseProductDenseReturnType { - typedef CwiseBinaryOp::Scalar, typename internal::traits::Scalar >::ReturnType>, diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index b23003bb1..84e69903b 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -99,10 +99,10 @@ struct generic_product_impl -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); } @@ -110,10 +110,10 @@ struct Assignment, internal::assig // dense += sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::add_assign_op, Sparse2Dense> +struct Assignment, internal::add_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { generic_product_impl::addTo(dst,src.lhs(),src.rhs()); } @@ -121,10 +121,10 @@ struct Assignment, internal::add_a // dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::sub_assign_op, Sparse2Dense> +struct Assignment, internal::sub_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { generic_product_impl::subTo(dst,src.lhs(),src.rhs()); } diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index b92bb17e2..4f0c84d88 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -223,13 +223,13 @@ struct Assignment - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { internal::permute_symm_to_fullsymm(src.matrix(), dst); } template - static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) { // TODO directly evaluate into dst; SparseMatrix tmp(dst.rows(),dst.cols()); @@ -586,12 +586,12 @@ class SparseSymmetricPermutationProduct namespace internal { template -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseSymmetricPermutationProduct SrcXprType; typedef typename DstXprType::StorageIndex DstIndex; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) { // internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); SparseMatrix tmp; @@ -600,7 +600,7 @@ struct Assignment } template - static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) { internal::permute_symm_to_symm(src.matrix(),dst.matrix(),src.perm().indices().data()); } diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index acd7f7e10..2d4498b03 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -705,12 +705,12 @@ struct evaluator_traits > }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); idMat.setIdentity(); @@ -721,12 +721,12 @@ struct Assignment, internal: }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op, Sparse2Dense> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows()); } diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index c3f8c2575..c6ea12c90 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -4,10 +4,10 @@ */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient wise quotient of \c *this and \a other diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index a8fa287c9..3c13b7f06 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2016 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -16,7 +16,13 @@ * * \sa class CwiseBinaryOp, operator-=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) +operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +{ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); +} /** \returns an expression of the sum of \c *this and \a other * @@ -24,7 +30,13 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) * * \sa class CwiseBinaryOp, operator+=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator+,internal::scalar_sum_op) +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) +operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +{ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); +} /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 67ec601b9..927167aff 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -73,7 +73,7 @@ operator/(const Scalar& scalar) const /** Overloaded for efficiently multipling with compatible scalar types */ template EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, +typename internal::enable_if::Defined, const CwiseUnaryOp, const Derived> >::type operator*(const T& scalar) const { @@ -91,7 +91,7 @@ operator*(const Scalar& scalar, const StorageBaseType& matrix) template EIGEN_DEVICE_FUNC inline friend -typename internal::enable_if::Defined, +typename internal::enable_if::Defined, const CwiseUnaryOp, const Derived> >::type operator*(const T& scalar, const StorageBaseType& matrix) { @@ -104,7 +104,7 @@ operator*(const T& scalar, const StorageBaseType& matrix) template EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, +typename internal::enable_if::Defined, const CwiseUnaryOp, const Derived> >::type operator/(const T& scalar) const { diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h index 6dd2e1192..59581e618 100644 --- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h @@ -19,10 +19,10 @@ */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient-wise == operator of *this and \a other diff --git a/blas/PackedTriangularMatrixVector.h b/blas/PackedTriangularMatrixVector.h index e9886d56f..0039536a8 100644 --- a/blas/PackedTriangularMatrixVector.h +++ b/blas/PackedTriangularMatrixVector.h @@ -18,7 +18,7 @@ struct packed_triangular_matrix_vector_product; template struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -47,7 +47,7 @@ struct packed_triangular_matrix_vector_product struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/test/array.cpp b/test/array.cpp index 4cd4f262b..bd470d5f7 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -72,7 +72,7 @@ template void array(const ArrayType& m) VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum()); if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision())) VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp index 75e6a778f..97e03be83 100644 --- a/test/array_for_matrix.cpp +++ b/test/array_for_matrix.cpp @@ -45,7 +45,7 @@ template void array_for_matrix(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index dbcf468ea..66d9b777a 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -42,6 +42,7 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_f mf = Mat_f::Random(size,size); Mat_d md = mf.template cast(); + //Mat_d rd = md; Mat_cf mcf = Mat_cf::Random(size,size); Mat_cd mcd = mcf.template cast >(); Mat_cd rcd = mcd; @@ -56,16 +57,12 @@ template void mixingtypes(int size = SizeAtCompileType) mf+mf; - VERIFY_RAISES_ASSERT(mf+md); -#if !EIGEN_HAS_STD_RESULT_OF - // this one does not even compile with C++11 - VERIFY_RAISES_ASSERT(mf+mcf); -#endif + +// VERIFY_RAISES_ASSERT(mf+md); // does not even compile #ifdef EIGEN_DONT_VECTORIZE VERIFY_RAISES_ASSERT(vf=vd); VERIFY_RAISES_ASSERT(vf+=vd); - VERIFY_RAISES_ASSERT(mcd=md); #endif // check scalar products @@ -186,16 +183,35 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_cd((scd * md.template cast().eval() * mcd).template triangularView())); - VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); - VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); + VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() + mcd.array(), md.template cast().eval().array() + mcd.array() ); + VERIFY_IS_APPROX( mcd.array() + md.array(), mcd.array() + md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); + VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); // VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd += md, mcd + md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd -= md, mcd - md.template cast().eval() ); rcd = mcd; VERIFY_IS_APPROX( rcd.array() *= md.array(), mcd.array() * md.template cast().eval().array() ); rcd = mcd; VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); + + rcd = mcd; + VERIFY_IS_APPROX( rcd += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + + rcd = mcd; + VERIFY_IS_APPROX( rcd += mcd + md*md, mcd + mcd + ((md*md).template cast().eval()) ); } void test_mixingtypes() diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 24a7641ff..b7c2df64b 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -29,7 +29,7 @@ using internal::demangle_unrolling; template bool test_assign(const Dst&, const Src&, int traversal, int unrolling) { - typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; bool res = traits::Traversal==traversal; if(unrolling==InnerUnrolling+CompleteUnrolling) res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling); @@ -53,7 +53,7 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling) template bool test_assign(int traversal, int unrolling) { - typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) { @@ -73,7 +73,8 @@ bool test_assign(int traversal, int unrolling) template bool test_redux(const Xpr&, int traversal, int unrolling) { - typedef internal::redux_traits,internal::redux_evaluator > traits; + typedef typename Xpr::Scalar Scalar; + typedef internal::redux_traits,internal::redux_evaluator > traits; bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 089042751..ba61288a3 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -501,36 +501,36 @@ struct make_coherent_impl -struct scalar_product_traits,A_Scalar> -{ - enum { Defined = 1 }; - typedef Matrix ReturnType; -}; - -template -struct scalar_product_traits > -{ - enum { Defined = 1 }; - typedef Matrix ReturnType; -}; - -template -struct scalar_product_traits,typename DerType::Scalar> -{ - enum { Defined = 1 }; - typedef AutoDiffScalar ReturnType; -}; - -template -struct scalar_product_traits > -{ - enum { Defined = 1 }; - typedef AutoDiffScalar ReturnType; -}; - } // end namespace internal +template +struct ScalarBinaryOpTraits,A_Scalar> +{ + enum { Defined = 1 }; + typedef Matrix ReturnType; +}; + +template +struct ScalarBinaryOpTraits > +{ + enum { Defined = 1 }; + typedef Matrix ReturnType; +}; + +template +struct ScalarBinaryOpTraits,typename DerType::Scalar> +{ + enum { Defined = 1 }; + typedef AutoDiffScalar ReturnType; +}; + +template +struct ScalarBinaryOpTraits > +{ + enum { Defined = 1 }; + typedef AutoDiffScalar ReturnType; +}; + #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ template \ inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename Eigen::internal::remove_all::type> > \ diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h index bf9727c21..582fa8512 100644 --- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h @@ -203,7 +203,7 @@ struct traits > { typedef typename remove_all<_Lhs>::type Lhs; typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; typedef typename promote_index_type::type StorageIndex; enum { @@ -222,7 +222,7 @@ struct traits > typedef MatrixXpr XprKind; typedef typename remove_all<_Lhs>::type Lhs; typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; typedef typename cwise_promote_storage_type::StorageKind, typename traits::StorageKind, scalar_product_op >::ret StorageKind; typedef typename promote_index_type::type StorageIndex; From 9137f560f0c84470c7859a6db704bf5f18ae999d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 07:26:48 -0700 Subject: [PATCH 24/86] Moved assertions to the constructor to make the code more portable --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 14 ++++++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h | 16 ---------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 4e873011e..a48cb1daa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -426,6 +426,20 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index 9509f8002..5f2e329f2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -227,22 +227,6 @@ struct traits::type Scalar; - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::Index, - typename traits::Index>::value), - STORAGE_INDEX_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::Index, - typename traits::Index>::value), - STORAGE_INDEX_MUST_MATCH) typedef traits XprTraits; typedef typename traits::StorageKind StorageKind; typedef typename traits::Index Index; From df24f4a01d762b0ae7dee8a1a0c769c96e4da835 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 16:46:46 +0200 Subject: [PATCH 25/86] bug #1201: improve code generation of affine*vec with MSVC --- Eigen/src/Geometry/Transform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 4fc876bcf..1a06c1e35 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -1367,7 +1367,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES); Matrix rhs; - rhs << other,1; + rhs.template head() = other; rhs[Dim] = 1; Matrix res(T.matrix() * rhs); return res.template head(); } From 33f0340188eb309dfa3b16c0758a878c3ea114d9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 12:06:42 -0700 Subject: [PATCH 26/86] Implement result_of for the new ternary functors --- Eigen/src/Core/util/Meta.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 7ecd59add..bd3a0aa5d 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -328,6 +328,30 @@ struct result_of { enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; typedef typename binary_result_of_select::type type; }; + +template +struct ternary_result_of_select {typedef typename internal::remove_all::type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::result_type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::template result::type type;}; + +template +struct result_of { + template + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; + typedef typename ternary_result_of_select::type type; +}; #endif /** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. From ea75dba2014ffa58acfcd160b5e59975c453f8da Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 13:32:28 -0700 Subject: [PATCH 27/86] Added missing EIGEN_DEVICE_FUNC qualifiers to the unary array ops --- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 775fa6ee0..9e35dc571 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -248,6 +248,7 @@ tan() const * * \sa tan(), asin(), acos() */ +EIGEN_DEVICE_FUNC inline const AtanReturnType atan() const { @@ -289,6 +290,7 @@ asin() const * * \sa tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const TanhReturnType tanh() const { @@ -302,6 +304,7 @@ tanh() const * * \sa sin(), tanh(), cosh() */ +EIGEN_DEVICE_FUNC inline const SinhReturnType sinh() const { @@ -315,6 +318,7 @@ sinh() const * * \sa tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const CoshReturnType cosh() const { @@ -332,6 +336,7 @@ cosh() const * * \sa digamma() */ +EIGEN_DEVICE_FUNC inline const LgammaReturnType lgamma() const { @@ -346,6 +351,7 @@ lgamma() const * * \sa Eigen::digamma(), Eigen::polygamma(), lgamma() */ +EIGEN_DEVICE_FUNC inline const DigammaReturnType digamma() const { @@ -364,6 +370,7 @@ digamma() const * * \sa erfc() */ +EIGEN_DEVICE_FUNC inline const ErfReturnType erf() const { @@ -382,6 +389,7 @@ erf() const * * \sa erf() */ +EIGEN_DEVICE_FUNC inline const ErfcReturnType erfc() const { @@ -455,6 +463,7 @@ cube() const * * \sa ceil(), floor() */ +EIGEN_DEVICE_FUNC inline const RoundReturnType round() const { @@ -468,6 +477,7 @@ round() const * * \sa ceil(), round() */ +EIGEN_DEVICE_FUNC inline const FloorReturnType floor() const { @@ -481,6 +491,7 @@ floor() const * * \sa floor(), round() */ +EIGEN_DEVICE_FUNC inline const CeilReturnType ceil() const { @@ -494,6 +505,7 @@ ceil() const * * \sa isfinite(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsNaNReturnType isNaN() const { @@ -507,6 +519,7 @@ isNaN() const * * \sa isnan(), isfinite() */ +EIGEN_DEVICE_FUNC inline const IsInfReturnType isInf() const { @@ -520,6 +533,7 @@ isInf() const * * \sa isnan(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsFiniteReturnType isFinite() const { From 7ef9f47b5874c33d15649a3312d463ecbd290365 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 14:09:46 -0700 Subject: [PATCH 28/86] Misc small improvements to the reduction code. --- .../CXX11/src/Tensor/TensorReductionCuda.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 0d1a098b7..e82530955 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -130,15 +130,17 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num if (block == 0) { // We're the first block to run, initialize the output value atomicExch(output, reducer.initialize()); - unsigned int old = atomicExch(semaphore, 2u); - assert(old == 1u); + __threadfence(); + atomicExch(semaphore, 2u); } else { + // Wait for the first block to initialize the output value. // Use atomicCAS here to ensure that the reads aren't cached - unsigned int val = atomicCAS(semaphore, 2u, 2u); - while (val < 2u) { + unsigned int val; + do { val = atomicCAS(semaphore, 2u, 2u); } + while (val < 2u); } } } @@ -166,12 +168,8 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num } if (gridDim.x > 1 && threadIdx.x == 0) { - unsigned int ticket = atomicInc(semaphore, UINT_MAX); - assert(ticket >= 2u); - if (ticket == gridDim.x + 1) { - // We're the last block, reset the semaphore - *semaphore = 0; - } + // Let the last block reset the semaphore + atomicInc(semaphore, gridDim.x + 1); } } From 3d71d3918e750be81739f24cdc0687648fb7f5c5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 23:10:55 +0200 Subject: [PATCH 29/86] Disable shortcuts for res ?= prod when the scalar types do not match exactly. --- Eigen/src/Core/ProductEvaluators.h | 15 +++++++++------ test/mixingtypes.cpp | 13 +++++++++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 7f041e5dd..71ae6e54c 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -122,14 +122,17 @@ protected: PlainObject m_result; }; +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::assign_op::Scalar>, Dense2Dense, +struct Assignment, internal::assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); @@ -138,12 +141,12 @@ struct Assignment, internal::assign_op -struct Assignment, internal::add_assign_op::Scalar>, Dense2Dense, +struct Assignment, internal::add_assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); @@ -152,12 +155,12 @@ struct Assignment, internal::add_assign_op< // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::sub_assign_op::Scalar>, Dense2Dense, +struct Assignment, internal::sub_assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 66d9b777a..2af188b25 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -208,10 +208,19 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); rcd = mcd; - VERIFY_IS_APPROX( rcd += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + VERIFY_IS_APPROX( rcd.noalias() += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + VERIFY_IS_APPROX( rcd.noalias() = md*md, ((md*md).eval().template cast()) ); rcd = mcd; - VERIFY_IS_APPROX( rcd += mcd + md*md, mcd + mcd + ((md*md).template cast().eval()) ); + VERIFY_IS_APPROX( rcd.noalias() += md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= md*md, mcd - ((md*md).eval().template cast()) ); + + VERIFY_IS_APPROX( rcd.noalias() = mcd + md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += mcd + md*md, mcd + mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= mcd + md*md, - ((md*md).eval().template cast()) ); } void test_mixingtypes() From 2c462f4201365d1ac4872245e81066746f09ac47 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 23:11:38 +0200 Subject: [PATCH 30/86] Clean handling for void type in EIGEN_CHECK_BINARY_COMPATIBILIY --- Eigen/src/Core/util/Meta.h | 24 ++++++++++++++++++++++++ Eigen/src/Core/util/XprHelper.h | 8 +------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index af661c313..efb9961ce 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -430,6 +430,30 @@ struct ScalarBinaryOpTraits typedef T ReturnType; }; +// For Matrix * Permutation +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +// For Permutation * Matrix +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +// for Permutation*Permutation +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef void ReturnType; +}; + template struct ScalarBinaryOpTraits,BinaryOp> { diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 4fd4a9b0d..828813161 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -610,11 +610,6 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if struct is_same_or_void { enum { value = is_same::value }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template<> struct is_same_or_void { enum { value = 1 }; }; - #ifdef EIGEN_DEBUG_ASSIGN std::string demangle_traversal(int t) { @@ -653,9 +648,8 @@ std::string demangle_flags(int f) // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to // add together a float matrix and a double matrix. -// Treat "void" as a special case. Needed for permutation products. TODO: this should be handled by ScalarBinaryOpTraits #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT(int(internal::is_same_or_void::value) || int(ScalarBinaryOpTraits::Defined), \ + EIGEN_STATIC_ASSERT(int(ScalarBinaryOpTraits::Defined), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen From 84b2060a9e3e57e49bad6208873e52c327ad135b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 17:16:19 -0700 Subject: [PATCH 31/86] Fixed compilation error with gcc 4.4 --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 031dbf6f2..61df8032d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -106,7 +106,7 @@ struct TensorEvaluator, Device> { m_output(NULL) { // Accumulating a scalar isn't supported. - EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(m_axis >= 0 && m_axis < NumDims); // Compute stride of scan axis @@ -136,7 +136,7 @@ struct TensorEvaluator, Device> { return true; } } - + template EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { return internal::ploadt(m_output + index); From db0118342c2480ba0da7e8e4fbdd6aebaac687a4 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 7 Jun 2016 19:17:18 +0200 Subject: [PATCH 32/86] Fixed compilation of BVH_Example (required for make doc) --- unsupported/doc/examples/BVH_Example.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/unsupported/doc/examples/BVH_Example.cpp b/unsupported/doc/examples/BVH_Example.cpp index 6b6fac075..4315fb440 100644 --- a/unsupported/doc/examples/BVH_Example.cpp +++ b/unsupported/doc/examples/BVH_Example.cpp @@ -6,9 +6,7 @@ using namespace Eigen; typedef AlignedBox Box2d; namespace Eigen { - namespace internal { Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point - } } struct PointPointMinimizer //how to compute squared distances between points and rectangles From 86aedc9282f85966461544c40403979b861d3e19 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Tue, 7 Jun 2016 20:06:38 +0100 Subject: [PATCH 33/86] Add small fixes to TensorScanOp --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 61df8032d..66fcacd8e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -81,7 +81,7 @@ struct TensorEvaluator, Device> { typedef typename XprType::Index Index; static const int NumDims = internal::array_size::Dimensions>::value; typedef DSizes Dimensions; - typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; @@ -152,6 +152,10 @@ struct TensorEvaluator, Device> { return m_output[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { if (m_output != NULL) { m_device.deallocate(m_output); From 002804938087acc829941c7500a5230fa5cf28b0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 23:08:11 +0200 Subject: [PATCH 34/86] bug #1240: Remove any assumption on NEON vector types. --- Eigen/src/Core/arch/NEON/Complex.h | 3 ++- Eigen/src/Core/arch/NEON/PacketMath.h | 11 ++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 234f29b80..ccc00e5a6 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -275,7 +275,8 @@ ptranspose(PacketBlock& kernel) { //---------- double ---------- #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG -static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); +const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 }; +static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA ); struct Packet1cd { diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index fa16bc9c8..e1247696d 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -49,14 +49,6 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1(X) -#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG - //Special treatment for Apple's llvm-gcc, its NEON packet types are unions - #define EIGEN_INIT_NEON_PACKET2D(X, Y) {{X, Y}} -#else - //Default initializer for packets - #define EIGEN_INIT_NEON_PACKET2D(X, Y) {X, Y} -#endif - // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC @@ -584,7 +576,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { r template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { - Packet2d countdown = EIGEN_INIT_NEON_PACKET2D(0, 1); + const double countdown_raw[] = {0.0,1.0}; + const Packet2d countdown = vld1q_f64(countdown_raw); return vaddq_f64(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } From 2e238bafb69ab0ee2ab2e682d5ac1a43376f9496 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:05:43 +0200 Subject: [PATCH 35/86] Big 279: enable mixing types for comparisons, min, and max. --- Eigen/src/Core/Redux.h | 4 +- Eigen/src/Core/functors/BinaryFunctors.h | 74 ++++++++++--------- Eigen/src/Core/util/ForwardDeclarations.h | 7 +- Eigen/src/Core/util/Macros.h | 18 ++--- .../src/Eigenvalues/GeneralizedEigenSolver.h | 32 ++++++-- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 20 ++--- Eigen/src/plugins/CommonCwiseBinaryOps.h | 30 ++++---- Eigen/src/plugins/MatrixCwiseBinaryOps.h | 16 ++-- 8 files changed, 113 insertions(+), 88 deletions(-) diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index ec969d9b9..b6e8f8887 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -425,7 +425,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return derived().redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -435,7 +435,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return derived().redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } /** \returns the sum of all coefficients of \c *this diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 98fcebae5..c59147db4 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -111,21 +111,22 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() */ -template struct scalar_min_op { +template struct scalar_min_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmin(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_min(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMin }; }; @@ -134,21 +135,22 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() */ -template struct scalar_max_op { +template struct scalar_max_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmax(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_max(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMax }; }; @@ -156,56 +158,56 @@ struct functor_traits > { * \brief Template functors for comparison of two scalars * \todo Implement packet-comparisons */ -template struct scalar_cmp_op; +template struct scalar_cmp_op; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, PacketAccess = false }; }; -template -struct result_of(Scalar,Scalar)> { +template +struct result_of(LhsScalar,RhsScalar)> { typedef bool type; }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;} }; @@ -214,7 +216,7 @@ template struct scalar_cmp_op { * * \sa MatrixBase::stableNorm(), class Redux */ -template struct scalar_hypot_op { +template struct scalar_hypot_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) // typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const @@ -235,7 +237,7 @@ template struct scalar_hypot_op { } }; template -struct functor_traits > { +struct functor_traits > { enum { Cost = 3 * NumTraits::AddCost + diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 045e22658..340d1f3a5 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -179,6 +179,8 @@ template struct scalar_sum_op; template struct scalar_difference_op; template struct scalar_conj_product_op; +template struct scalar_min_op; +template struct scalar_max_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; template struct scalar_real_op; @@ -201,8 +203,6 @@ template struct scalar_cube_op; template struct scalar_cast_op; template struct scalar_multiple_op; template struct scalar_quotient1_op; -template struct scalar_min_op; -template struct scalar_max_op; template struct scalar_random_op; template struct scalar_add_op; template struct scalar_constant_op; @@ -212,9 +212,10 @@ template struct scalar_igamma_op; template struct scalar_igammac_op; template struct scalar_betainc_op; +template struct scalar_hypot_op; template struct scalar_product_op; -template struct scalar_multiple2_op; template struct scalar_quotient_op; +template struct scalar_multiple2_op; template struct scalar_quotient2_op; } // end namespace internal diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 35547fdda..a51572463 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -876,15 +876,7 @@ namespace Eigen { #define EIGEN_IMPLIES(a,b) (!(a) || (b)) -#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \ - template \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ - (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ - { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ - } - -// the expression type of a cwise product +// the expression type of a standard coefficient wise binary operation #define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ @@ -895,6 +887,14 @@ namespace Eigen { const RHS \ > +#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \ + (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ + { \ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ + } + #ifdef EIGEN_EXCEPTIONS # define EIGEN_THROW_X(X) throw X # define EIGEN_THROW throw diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index a9d6790d5..07a9ccf46 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,13 +327,33 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z); + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T + // From the eigen decomposition of T = U * E * U^-1, + // we can extract the eigenvalues of (U^-1 * S * U) / E + // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)]. + // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00): + + // T = [a b ; 0 c] + // S = [e f ; g h] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1); + RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); + RealScalar d = c-a; + RealScalar gb = g*b; + Matrix A; + A << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, + g*c , (gb+h*d)*a; + + // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, + // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): + + Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1)); + Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1))); + m_alphas.coeffRef(i) = ComplexScalar(A.coeff(i+1, i+1) + p, z); + m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z); + + m_betas.coeffRef(i) = + m_betas.coeffRef(i+1) = a*c*d; - m_betas.coeffRef(i) = m_realQZ.matrixT().coeff(i,i); - m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i); i += 2; } } diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index c6ea12c90..da47a7ed7 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -29,14 +29,14 @@ operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * * \sa max() */ -EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op) +EIGEN_MAKE_CWISE_BINARY_OP(min,min) /** \returns an expression of the coefficient-wise min of \c *this and scalar \a other * * \sa max() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN min @@ -55,14 +55,14 @@ min * * \sa min() */ -EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op) +EIGEN_MAKE_CWISE_BINARY_OP(max,max) /** \returns an expression of the coefficient-wise max of \c *this and scalar \a other * * \sa min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN max @@ -95,13 +95,13 @@ pow(const ArrayBase& exponents) const // TODO code generating macros could be moved to Macros.h and could include generation of documentation #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ }\ -typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ -typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \ OP(const Scalar& s) const { \ return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \ @@ -113,10 +113,10 @@ OP(const Scalar& s, const Derived& d) { \ #define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ + return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ } \ EIGEN_DEVICE_FUNC \ inline const RCmp ## RCOMPARATOR ## ReturnType \ diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index 3c13b7f06..6a5ccc1aa 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -16,13 +16,14 @@ * * \sa class CwiseBinaryOp, operator-=() */ -template -EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) -operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -{ - return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); -} +EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) +// template +// EIGEN_DEVICE_FUNC +// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) +// operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +// { +// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); +// } /** \returns an expression of the sum of \c *this and \a other * @@ -30,13 +31,14 @@ operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * * \sa class CwiseBinaryOp, operator+=() */ -template -EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) -operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -{ - return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); -} +EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum) +// template +// EIGEN_DEVICE_FUNC +// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) +// operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +// { +// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); +// } /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h index 59581e618..f1084abef 100644 --- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h @@ -74,10 +74,10 @@ cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of *this and scalar \a other @@ -85,7 +85,7 @@ cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMin(const Scalar &other) const { return cwiseMin(Derived::Constant(rows(), cols(), other)); @@ -100,10 +100,10 @@ cwiseMin(const Scalar &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise max of *this and scalar \a other @@ -111,7 +111,7 @@ cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMax(const Scalar &other) const { return cwiseMax(Derived::Constant(rows(), cols(), other)); @@ -133,7 +133,7 @@ cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } -typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; +typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; /** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s * @@ -148,5 +148,5 @@ EIGEN_DEVICE_FUNC inline const CwiseScalarEqualReturnType cwiseEqual(const Scalar& s) const { - return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); + return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); } From 5fdd7036293aef88e5bdfe30acc74206486d82af Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:58:04 +0200 Subject: [PATCH 36/86] Enable mixing types in numext::pow --- Eigen/src/Core/MathFunctions.h | 35 +++++++++-------------- Eigen/src/Core/functors/BinaryFunctors.h | 3 +- Eigen/src/Core/util/ForwardDeclarations.h | 1 + 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index ece04b754..2a05ae12d 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -494,24 +494,26 @@ struct log1p_retval * Implementation of pow * ****************************************************************************/ -template -struct pow_default_impl +template::IsInteger&&NumTraits::IsInteger> +struct pow_impl { - typedef Scalar retval; - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) + //typedef Scalar retval; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { EIGEN_USING_STD_MATH(pow); return pow(x, y); } }; -template -struct pow_default_impl +template +struct pow_impl { - static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y) + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(const ScalarX &x, const ScalarY &y) { - Scalar res(1); - eigen_assert(!NumTraits::IsSigned || y >= 0); + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); if(y & 1) res *= x; y >>= 1; while(y) @@ -524,15 +526,6 @@ struct pow_default_impl } }; -template -struct pow_impl : pow_default_impl::IsInteger> {}; - -template -struct pow_retval -{ - typedef Scalar type; -}; - /**************************************************************************** * Implementation of random * ****************************************************************************/ @@ -928,11 +921,11 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -template +template EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) +inline typename internal::pow_impl::result_type pow(const ScalarX& x, const ScalarY& y) { - return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); + return internal::pow_impl::run(x, y); } template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index c59147db4..ec06499a0 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -251,9 +251,10 @@ struct functor_traits > { * \brief Template functor to compute the pow of two scalars */ template struct scalar_binary_pow_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } + inline result_type operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } }; template struct functor_traits > { diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 340d1f3a5..760d6bcf6 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -217,6 +217,7 @@ template struct scalar_product_ template struct scalar_quotient_op; template struct scalar_multiple2_op; template struct scalar_quotient2_op; +template struct scalar_binary_pow_op; } // end namespace internal From 5de8d7036b8b48bc4986d96f677c77eb466ce02b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:58:22 +0200 Subject: [PATCH 37/86] Add real.pow(complex), complex.pow(real) unit tests. --- test/mixingtypes.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 2af188b25..d719a3c53 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -196,6 +196,9 @@ template void mixingtypes(int size = SizeAtCompileType) // VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + rcd = mcd; VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); rcd = mcd; From fabae6c9a180669f6afbd732ee6973311a36476c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:58:33 +0200 Subject: [PATCH 38/86] Cleanup --- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 12 ++---------- Eigen/src/plugins/CommonCwiseBinaryOps.h | 14 -------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index da47a7ed7..7f7d0dbf4 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -81,16 +81,8 @@ max * Example: \include Cwise_array_power_array.cpp * Output: \verbinclude Cwise_array_power_array.out */ -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -const CwiseBinaryOp, const Derived, const ExponentDerived> -pow(const ArrayBase& exponents) const -{ - return CwiseBinaryOp, const Derived, const ExponentDerived>( - this->derived(), - exponents.derived() - ); -} +EIGEN_MAKE_CWISE_BINARY_OP(pow,binary_pow) + // TODO code generating macros could be moved to Macros.h and could include generation of documentation #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \ diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index 6a5ccc1aa..f16be7bea 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -17,13 +17,6 @@ * \sa class CwiseBinaryOp, operator-=() */ EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) -// template -// EIGEN_DEVICE_FUNC -// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) -// operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -// { -// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); -// } /** \returns an expression of the sum of \c *this and \a other * @@ -32,13 +25,6 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) * \sa class CwiseBinaryOp, operator+=() */ EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum) -// template -// EIGEN_DEVICE_FUNC -// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) -// operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -// { -// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); -// } /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * From a05607875a57129821f81e6aae1727357f4db5e6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 10 Jun 2016 11:53:56 -0700 Subject: [PATCH 39/86] Don't refer to the half2 type unless it's been defined --- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 0f3778e6e..fdb5ee6b8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -52,7 +52,7 @@ struct PacketType : internal::packet_traits { }; // For CUDA packet types when using a GpuDevice -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) template <> struct PacketType { typedef half2 type; From 83904a21c11ffdb88f3ad8a65ded7bf46c1a068a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 11 Jun 2016 14:41:36 +0200 Subject: [PATCH 40/86] Make sure T(i+1,i)==0 when diagonalizing T(i:i+1,i:i+1) --- Eigen/src/Eigenvalues/RealQZ.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index c4715b954..b3a910dd9 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -630,11 +630,11 @@ namespace Eigen { internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right); // Apply resulting Jacobi rotations - m_T.applyOnTheLeft(i,i+1,j_left); - m_T.applyOnTheRight(i,i+1,j_right); m_S.applyOnTheLeft(i,i+1,j_left); m_S.applyOnTheRight(i,i+1,j_right); - m_T(i,i+1) = Scalar(0); + m_T.applyOnTheLeft(i,i+1,j_left); + m_T.applyOnTheRight(i,i+1,j_right); + m_T(i+1,i) = m_T(i,i+1) = Scalar(0); if(m_computeQZ) { m_Q.applyOnTheRight(i,i+1,j_left.transpose()); From a3a4714abac02ba48a683c3c3967cebee2833188 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 11 Jun 2016 14:41:53 +0200 Subject: [PATCH 41/86] Add debug output. --- test/real_qz.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/real_qz.cpp b/test/real_qz.cpp index a1766c6d9..45ae8d763 100644 --- a/test/real_qz.cpp +++ b/test/real_qz.cpp @@ -49,11 +49,20 @@ template void real_qz(const MatrixType& m) for (Index i=0; i void mixingtypes(int size = SizeAtCompileType) // check scalar products VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); - VERIFY_IS_APPROX(sd * vcd, complex(sd) * vcd); + VERIFY_IS_APPROX(sd * vcd , complex(sd) * vcd); VERIFY_IS_APPROX(vf * scf , vf.template cast >() * scf); - VERIFY_IS_APPROX(scd * vd, scd * vd.template cast >()); + VERIFY_IS_APPROX(scd * vd , scd * vd.template cast >()); + + // check scalar quotients + VERIFY_IS_APPROX(vcf / sf , vcf / complex(sf)); + VERIFY_IS_APPROX(vf / scf , vf.template cast >() / scf); // check dot product vf.dot(vf); From 7a9ef7bbb4d71e9ea5150a244238e62d350a8896 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 13 Jun 2016 16:17:23 +0200 Subject: [PATCH 44/86] Add default template parameters for the second scalar type of binary functors. This enhences backward compatibility. --- Eigen/src/Core/util/ForwardDeclarations.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 760d6bcf6..af3dce21b 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -176,11 +176,11 @@ namespace internal { // with optional conjugation of the arguments. template struct conj_helper; -template struct scalar_sum_op; -template struct scalar_difference_op; -template struct scalar_conj_product_op; -template struct scalar_min_op; -template struct scalar_max_op; +template struct scalar_sum_op; +template struct scalar_difference_op; +template struct scalar_conj_product_op; +template struct scalar_min_op; +template struct scalar_max_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; template struct scalar_real_op; From 3c12e241643ebea97706c9ed7412e9a7c1bf16d0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 13 Jun 2016 16:18:59 +0200 Subject: [PATCH 45/86] Add bind1st_op and bind2nd_op helpers to turn binary functors into unary ones, and implement scalar_multiple2 and scalar_quotient2 on top of them. --- Eigen/src/Core/functors/BinaryFunctors.h | 140 +++++++++++++----- Eigen/src/Core/util/ForwardDeclarations.h | 2 - Eigen/src/plugins/CommonCwiseUnaryOps.h | 23 ++- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 8 +- 4 files changed, 115 insertions(+), 58 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index ec06499a0..637514a20 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -16,12 +16,21 @@ namespace internal { //---------- associative binary functors ---------- +template +struct binary_op_base +{ + typedef Arg1 first_argument_type; + typedef Arg2 second_argument_type; +}; + /** \internal * \brief Template functor to compute the sum of two scalars * * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum() */ -template struct scalar_sum_op { +template +struct scalar_sum_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } @@ -57,7 +66,9 @@ template<> struct scalar_sum_op : scalar_sum_op { * * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() */ -template struct scalar_product_op { +template +struct scalar_product_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } @@ -82,7 +93,9 @@ struct functor_traits > { * * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) */ -template struct scalar_conj_product_op { +template +struct scalar_conj_product_op : binary_op_base +{ enum { Conj = NumTraits::IsComplex @@ -111,7 +124,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() */ -template struct scalar_min_op { +template +struct scalar_min_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); } @@ -135,7 +150,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() */ -template struct scalar_max_op { +template +struct scalar_max_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); } @@ -174,37 +191,51 @@ struct result_of(LhsScalar,RhsScalar)> }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;} @@ -216,7 +247,9 @@ template struct scalar_cmp_op struct scalar_hypot_op { +template +struct scalar_hypot_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) // typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const @@ -250,7 +283,9 @@ struct functor_traits > { /** \internal * \brief Template functor to compute the pow of two scalars */ -template struct scalar_binary_pow_op { +template +struct scalar_binary_pow_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) EIGEN_DEVICE_FUNC @@ -270,7 +305,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::operator- */ -template struct scalar_difference_op { +template +struct scalar_difference_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } @@ -291,7 +328,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, Cwise::operator/() */ -template struct scalar_quotient_op { +template +struct scalar_quotient_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } @@ -363,7 +402,8 @@ template<> struct functor_traits { * * \sa class CwiseBinaryOp, Cwise::igamma */ -template struct scalar_igamma_op { +template struct scalar_igamma_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { using numext::igamma; return igamma(a, x); @@ -388,7 +428,8 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, Cwise::igammac */ -template struct scalar_igammac_op { +template struct scalar_igammac_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { using numext::igammac; return igammac(a, x); @@ -411,6 +452,47 @@ struct functor_traits > { //---------- binary functors bound to a constant, thus appearing as a unary functor ---------- +// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value. +// They are analogues to std::binder1st/binder2nd but with the following differences: +// - they are compatible with packetOp +// - they are portable across C++ versions (the std::binder* are deprecated in C++11) +template struct bind1st_op : BinaryOp { + + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; + + bind1st_op(const first_argument_type &val) : m_value(val) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const + { return BinaryOp::packetOp(internal::pset1(m_value), b); } + + first_argument_type m_value; +}; +template struct functor_traits > : functor_traits {}; + + +template struct bind2nd_op : BinaryOp { + + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; + + bind2nd_op(const second_argument_type &val) : m_value(val) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return BinaryOp::packetOp(a,internal::pset1(m_value)); } + + second_argument_type m_value; +}; +template struct functor_traits > : functor_traits {}; + /** \internal * \brief Template functor to multiply a scalar by a fixed other one * @@ -442,17 +524,6 @@ template struct functor_traits > { enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; -template -struct scalar_multiple2_op { - typedef typename ScalarBinaryOpTraits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; /** \internal * \brief Template functor to divide a scalar by a fixed other one @@ -477,17 +548,6 @@ template struct functor_traits > { enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; -template -struct scalar_quotient2_op { - typedef typename ScalarBinaryOpTraits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; /** \internal diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index af3dce21b..84d505d4a 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -215,8 +215,6 @@ template struct scalar_betainc_op; template struct scalar_hypot_op; template struct scalar_product_op; template struct scalar_quotient_op; -template struct scalar_multiple2_op; -template struct scalar_quotient2_op; template struct scalar_binary_pow_op; } // end namespace internal diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 927167aff..80dc46cd4 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -14,7 +14,6 @@ /** \internal Represents a scalar multiple of an expression */ typedef CwiseUnaryOp, const Derived> ScalarMultipleReturnType; -typedef CwiseUnaryOp >, const Derived> ScalarComplexMultipleReturnType; /** \internal Represents a quotient of an expression by a scalar*/ typedef CwiseUnaryOp, const Derived> ScalarQuotient1ReturnType; @@ -73,15 +72,15 @@ operator/(const Scalar& scalar) const /** Overloaded for efficiently multipling with compatible scalar types */ template EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type +typename internal::enable_if::Defined, + const CwiseUnaryOp >, const Derived> >::type operator*(const T& scalar) const { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp, const Derived>( - derived(), internal::scalar_multiple2_op(scalar) ); + typedef internal::bind2nd_op > op; + return CwiseUnaryOp(derived(), op(scalar) ); } EIGEN_DEVICE_FUNC @@ -91,28 +90,28 @@ operator*(const Scalar& scalar, const StorageBaseType& matrix) template EIGEN_DEVICE_FUNC inline friend -typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type +typename internal::enable_if::Defined, + const CwiseUnaryOp >, const Derived> >::type operator*(const T& scalar, const StorageBaseType& matrix) { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp, const Derived>( - matrix.derived(), internal::scalar_multiple2_op(scalar) ); + typedef internal::bind1st_op > op; + return CwiseUnaryOp(matrix.derived(), op(scalar) ); } template EIGEN_DEVICE_FUNC inline typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type + const CwiseUnaryOp >, const Derived> >::type operator/(const T& scalar) const { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp, const Derived>( - derived(), internal::scalar_quotient2_op(scalar) ); + typedef internal::bind2nd_op > op; + return CwiseUnaryOp(derived(), op(scalar) ); } template struct CastXpr { typedef typename internal::cast_return_type, const Derived> >::type Type; }; diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index ba61288a3..0ed91fdb7 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -426,18 +426,18 @@ struct auto_diff_special_op<_DerType, true> } - inline const AutoDiffScalar, DerType>::Type > + inline const AutoDiffScalar >, DerType>::Type > operator*(const Real& other) const { - return AutoDiffScalar, DerType>::Type >( + return AutoDiffScalar >, DerType>::Type >( derived().value() * other, derived().derivatives() * other); } - friend inline const AutoDiffScalar, DerType>::Type > + friend inline const AutoDiffScalar >, DerType>::Type > operator*(const Real& other, const AutoDiffScalar<_DerType>& a) { - return AutoDiffScalar, DerType>::Type >( + return AutoDiffScalar >, DerType>::Type >( a.value() * other, a.derivatives() * other); } From 39781dc1e230a2ece36b484dbef76f66d2136f86 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:03:26 +0200 Subject: [PATCH 46/86] Fix compilation of evaluator unit test --- test/evaluators.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 876dffe22..aed5a05a7 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -21,7 +21,7 @@ namespace Eigen { EIGEN_STRONG_INLINE DstXprType& copy_using_evaluator(const EigenBase &dst, const SrcXprType &src) { - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -29,7 +29,7 @@ namespace Eigen { EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias& dst, const SrcXprType &src) { - call_assignment(dst, src.derived(), internal::assign_op()); + call_assignment(dst, src.derived(), internal::assign_op()); return dst.expression(); } @@ -45,7 +45,7 @@ namespace Eigen { dst.const_cast_derived().resizeLike(src.derived()); #endif - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -53,28 +53,28 @@ namespace Eigen { void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); } template void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); } template void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); } template void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); } template From 64fcfd314f594bbc358aa8e2b8dc8def9b288b66 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:26:57 +0200 Subject: [PATCH 47/86] Implement scalar multiples and division by a scalar as a binary-expression with a constant expression. This slightly complexifies the type of the expressions and implies that we now have to distinguish between scalar*expr and expr*scalar to catch scalar-multiple expression (e.g., see BlasUtil.h), but this brings several advantages: - it makes it clear on each side the scalar is applied, - it clearly reflects that we are dealing with a binary-expression, - the complexity of the type is hidden through macros defined at the end of Macros.h, - distinguishing between "scalar op expr" and "expr op scalar" is important to support non commutative fields (like quaternions) - "scalar op expr" is now fully equivalent to "ConstantExpr(scalar) op expr" - scalar_multiple_op, scalar_quotient1_op and scalar_quotient2_op are not used anymore in officially supported modules (still used in Tensor) --- Eigen/src/Core/DiagonalMatrix.h | 9 ++- Eigen/src/Core/ProductEvaluators.h | 37 +++++++----- Eigen/src/Core/SelfAdjointView.h | 2 +- Eigen/src/Core/util/BlasUtil.h | 24 ++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 2 - Eigen/src/Core/util/Macros.h | 45 ++++++++++++++ Eigen/src/Core/util/XprHelper.h | 14 +++++ Eigen/src/Geometry/AlignedBox.h | 8 +-- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 1 + Eigen/src/plugins/CommonCwiseBinaryOps.h | 30 ++++++++++ Eigen/src/plugins/CommonCwiseUnaryOps.h | 71 ----------------------- test/array.cpp | 2 +- 12 files changed, 146 insertions(+), 99 deletions(-) diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index aa619dd5c..d6f89bced 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -71,18 +71,17 @@ class DiagonalBase : public EigenBase return InverseReturnType(diagonal().cwiseInverse()); } - typedef DiagonalWrapper, const DiagonalVectorType> > ScalarMultipleReturnType; EIGEN_DEVICE_FUNC - inline const ScalarMultipleReturnType + inline const DiagonalWrapper operator*(const Scalar& scalar) const { - return ScalarMultipleReturnType(diagonal() * scalar); + return DiagonalWrapper(diagonal() * scalar); } EIGEN_DEVICE_FUNC - friend inline const ScalarMultipleReturnType + friend inline const DiagonalWrapper operator*(const Scalar& scalar, const DiagonalBase& other) { - return ScalarMultipleReturnType(other.diagonal() * scalar); + return DiagonalWrapper(scalar * other.diagonal()); } }; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 71ae6e54c..77549e709 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -35,22 +35,28 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; -// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // TODO we should apply that rule only if that's really helpful -template -struct evaluator_assume_aliasing, const Product > > +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product > > { static const bool value = true; }; -template -struct evaluator, const Product > > - : public evaluator,const Lhs>, Rhs, DefaultProduct> > +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product > > + : public evaluator > { - typedef CwiseUnaryOp, const Product > XprType; - typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; - + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product > XprType; + typedef evaluator > Base; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) - : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -171,16 +177,17 @@ struct Assignment, internal::sub_assign_op< // Dense ?= scalar * Product // TODO we should apply that rule if that's really helpful // for instance, this is not good for inner products -template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> -struct Assignment, +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment, const CwiseNullaryOp,Plain>, const Product >, AssignFunc, Dense2Dense, Scalar> { - typedef CwiseUnaryOp, - const Product > SrcXprType; + typedef CwiseBinaryOp, + const CwiseNullaryOp,Plain>, + const Product > SrcXprType; static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { - call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); } }; diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 92c541f08..62d4180da 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -129,7 +129,7 @@ template class SelfAdjointView } friend EIGEN_DEVICE_FUNC - const SelfAdjointView,MatrixType>,UpLo> + const SelfAdjointView operator*(const Scalar& s, const SelfAdjointView& mat) { return (s*mat.nestedExpression()).template selfadjointView(); diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index c163f1458..7e8f90d88 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -293,6 +293,30 @@ struct blas_traits, NestedXpr> > }; // pop scalar multiple +template +struct blas_traits, const CwiseNullaryOp,Plain>, NestedXpr> > + : blas_traits +{ + typedef blas_traits Base; + typedef CwiseBinaryOp, const CwiseNullaryOp,Plain>, NestedXpr> XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } +}; +template +struct blas_traits, NestedXpr, const CwiseNullaryOp,Plain> > > + : blas_traits +{ + typedef blas_traits Base; + typedef CwiseBinaryOp, NestedXpr, const CwiseNullaryOp,Plain> > XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } +}; + +// pop scalar multiple (using deprecated scalar_multiple_op) template struct blas_traits, NestedXpr> > : blas_traits diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 84d505d4a..8334446d6 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -201,8 +201,6 @@ template struct scalar_inverse_op; template struct scalar_square_op; template struct scalar_cube_op; template struct scalar_cast_op; -template struct scalar_multiple_op; -template struct scalar_quotient1_op; template struct scalar_random_op; template struct scalar_add_op; template struct scalar_constant_op; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index a51572463..f3c6512c9 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -895,6 +895,51 @@ namespace Eigen { return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ } +#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \ + CwiseBinaryOp::Scalar,SCALAR>, const EXPR, \ + const typename internal::plain_constant_type::type> + +#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \ + CwiseBinaryOp::Scalar>, \ + const typename internal::plain_constant_type::type, const EXPR> + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ + EIGEN_DEVICE_FUNC inline \ + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME) \ + (METHOD)(const Scalar& scalar) const { \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ + } \ + \ + template EIGEN_DEVICE_FUNC inline \ + typename internal::enable_if::Defined, \ + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME) >::type \ + (METHOD)(const T& scalar) const { \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_DEVICE_FUNC inline friend \ + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME) \ + (METHOD)(const Scalar& scalar, const StorageBaseType& matrix) { \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ + } \ + \ + template EIGEN_DEVICE_FUNC inline friend \ + typename internal::enable_if::Defined, \ + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME) >::type \ + (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) + + #ifdef EIGEN_EXCEPTIONS # define EIGEN_THROW_X(X) throw X # define EIGEN_THROW throw diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 828813161..c41c408b0 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -576,6 +576,20 @@ struct plain_diag_type >::type type; }; +template +struct plain_constant_type +{ + enum { Options = (traits::Flags&RowMajorBit)?RowMajor:0 }; + + typedef Array::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> array_type; + + typedef Matrix::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> matrix_type; + + typedef CwiseNullaryOp, const typename conditional::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type; +}; + template struct is_lvalue { diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index aeb043a6c..d20d17492 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -36,8 +36,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) typedef NumTraits ScalarTraits; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef typename ScalarTraits::Real RealScalar; - typedef typename ScalarTraits::NonInteger NonInteger; + typedef typename ScalarTraits::NonInteger NonInteger; typedef Matrix VectorType; + typedef CwiseBinaryOp, const VectorType, const VectorType> VectorTypeSum; /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */ enum CornerType @@ -111,10 +112,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) inline VectorType& (max)() { return m_max; } /** \returns the center of the box */ - inline const CwiseUnaryOp, - const CwiseBinaryOp, const VectorType, const VectorType> > + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient) center() const - { return (m_min+m_max)/2; } + { return (m_min+m_max)/RealScalar(2); } /** \returns the lengths of the sides of the bounding box. * Note that this function does not get the same diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 7f7d0dbf4..351762e82 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -1,3 +1,4 @@ + /** \returns an expression of the coefficient wise product of \c *this and \a other * * \sa MatrixBase::cwiseProduct diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index f16be7bea..c5eaea16d 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -45,3 +45,33 @@ binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other, const Cu return CwiseBinaryOp(derived(), other.derived(), func); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); +#else +/** \returns an expression of \c *this scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; +/** \returns an expression of \c *this scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); +#endif + + + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient); +#else +/** \returns an expression of \c *this divided by the scalar value \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; +#endif diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 80dc46cd4..6cd5479a0 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -12,11 +12,6 @@ #ifndef EIGEN_PARSED_BY_DOXYGEN -/** \internal Represents a scalar multiple of an expression */ -typedef CwiseUnaryOp, const Derived> ScalarMultipleReturnType; - -/** \internal Represents a quotient of an expression by a scalar*/ -typedef CwiseUnaryOp, const Derived> ScalarQuotient1ReturnType; /** \internal the return type of conjugate() */ typedef typename internal::conditional::IsComplex, const CwiseUnaryOp, const Derived>, @@ -38,7 +33,6 @@ typedef CwiseUnaryOp, const Derived> ImagReturn typedef CwiseUnaryView, Derived> NonConstImagReturnType; typedef CwiseUnaryOp, const Derived> NegativeReturnType; -//typedef CwiseUnaryOp, const Derived> #endif // not EIGEN_PARSED_BY_DOXYGEN @@ -49,71 +43,6 @@ inline const NegativeReturnType operator-() const { return NegativeReturnType(derived()); } -/** \returns an expression of \c *this scaled by the scalar factor \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarMultipleReturnType -operator*(const Scalar& scalar) const -{ - return ScalarMultipleReturnType(derived(), internal::scalar_multiple_op(scalar)); -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN -const ScalarMultipleReturnType operator*(const RealScalar& scalar) const; -#endif - -/** \returns an expression of \c *this divided by the scalar value \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarQuotient1ReturnType -operator/(const Scalar& scalar) const -{ - return ScalarQuotient1ReturnType(derived(), internal::scalar_quotient1_op(scalar)); -} - -/** Overloaded for efficiently multipling with compatible scalar types */ -template -EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp >, const Derived> >::type -operator*(const T& scalar) const -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - typedef internal::bind2nd_op > op; - return CwiseUnaryOp(derived(), op(scalar) ); -} - -EIGEN_DEVICE_FUNC -inline friend const ScalarMultipleReturnType -operator*(const Scalar& scalar, const StorageBaseType& matrix) -{ return matrix*scalar; } - -template -EIGEN_DEVICE_FUNC inline friend -typename internal::enable_if::Defined, - const CwiseUnaryOp >, const Derived> >::type -operator*(const T& scalar, const StorageBaseType& matrix) -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - typedef internal::bind1st_op > op; - return CwiseUnaryOp(matrix.derived(), op(scalar) ); -} - -template -EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp >, const Derived> >::type -operator/(const T& scalar) const -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - typedef internal::bind2nd_op > op; - return CwiseUnaryOp(derived(), op(scalar) ); -} - template struct CastXpr { typedef typename internal::cast_return_type, const Derived> >::type Type; }; /** \returns an expression of *this with the \a Scalar type casted to diff --git a/test/array.cpp b/test/array.cpp index bd470d5f7..0416ec5d2 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -807,7 +807,7 @@ void test_array() VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, int >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, float >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); - typedef CwiseUnaryOp, ArrayXd > Xpr; + typedef CwiseUnaryOp, ArrayXd > Xpr; VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); From deb8306e60c53f052d1df16dce38a4b6c6c98aab Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:28:03 +0200 Subject: [PATCH 48/86] Move MatrixBase::operaotr*(UniformScaling) as a free function in Scaling.h, and fix return type. --- Eigen/src/Core/MatrixBase.h | 4 +--- Eigen/src/Geometry/Scaling.h | 11 +++++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index f63505fef..d9d2426ad 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -403,7 +403,6 @@ template class MatrixBase inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; - inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; @@ -416,8 +415,7 @@ template class MatrixBase typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; - typedef CwiseUnaryOp::Scalar>, - const ConstStartMinusOne > HNormalizedReturnType; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; inline const HNormalizedReturnType hnormalized() const; diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h index 643138199..3e12681b0 100644 --- a/Eigen/src/Geometry/Scaling.h +++ b/Eigen/src/Geometry/Scaling.h @@ -107,12 +107,15 @@ public: /** \addtogroup Geometry_Module */ //@{ -/** Concatenates a linear transformation matrix and a uniform scaling */ +/** Concatenates a linear transformation matrix and a uniform scaling + * \relates UniformScaling + */ // NOTE this operator is defiend in MatrixBase and not as a friend function // of UniformScaling to fix an internal crash of Intel's ICC -template typename MatrixBase::ScalarMultipleReturnType -MatrixBase::operator*(const UniformScaling& s) const -{ return derived() * s.factor(); } +template +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,product) +operator*(const MatrixBase& matrix, const UniformScaling& s) +{ return matrix.derived() * s.factor(); } /** Constructs a uniform scaling from scale factor \a s */ static inline UniformScaling Scaling(float s) { return UniformScaling(s); } From f5b1c7394537cc74978617383628c3b6b9399e57 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:29:06 +0200 Subject: [PATCH 49/86] Set cost of constant expression to 0 (the cost should be amortized through the expression) --- Eigen/src/Core/functors/NullaryFunctors.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 78cc22277..eaa582f23 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -26,7 +26,8 @@ struct scalar_constant_op { }; template struct functor_traits > -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; +{ enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */, + PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; template struct scalar_identity_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) From f57fd78e308c80d25b1f8c92ae8db2bdc5045a29 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:29:54 +0200 Subject: [PATCH 50/86] Generalize coeff-wise sparse products to support different scalar types --- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 27 ++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index dd21eb8c5..aad7b7d79 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -28,6 +28,9 @@ namespace Eigen { // generic sparse // 4 - dense op dense product dense // generic dense +// +// TODO to ease compiler job, we could specialize product/quotient with a scalar +// and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op. template class CwiseBinaryOpImpl @@ -323,12 +326,12 @@ protected: }; // "sparse .* sparse" -template -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -407,12 +410,12 @@ protected: }; // "dense .* sparse" -template -struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef evaluator LhsEvaluator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -480,12 +483,12 @@ protected: }; // "sparse .* dense" -template -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef evaluator RhsEvaluator; typedef CwiseBinaryOp XprType; From bcc0f38f981f8efadeba314fb41e58f911c60ccc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:31:27 +0200 Subject: [PATCH 51/86] Add unittesting plugins to scalar_product_op and scalar_quotient_op to help chaking that types are properly propagated. --- Eigen/src/Core/functors/BinaryFunctors.h | 12 ++++++++++++ test/linearstructure.cpp | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 637514a20..77e9e6e93 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -70,7 +70,13 @@ template struct scalar_product_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) +#else + scalar_product_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -332,7 +338,13 @@ template struct scalar_quotient_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) +#else + scalar_quotient_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index e7f4b3dc5..7eef976d2 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -9,7 +9,7 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. static bool g_called; -#define EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN { g_called = true; } +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } #include "main.h" From 12350d3ac7a1192407b0f920bd937d4f753ec118 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:31:52 +0200 Subject: [PATCH 52/86] Add unit test for AlignedBox::center --- test/geo_alignedbox.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp index 2bdb4b7f2..ba3378aab 100644 --- a/test/geo_alignedbox.cpp +++ b/test/geo_alignedbox.cpp @@ -48,6 +48,8 @@ template void alignedbox(const BoxType& _box) b0.extend(p0); b0.extend(p1); VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1)); + VERIFY(b0.contains(b0.center())); + VERIFY(b0.center()==(p0+p1)/Scalar(2)); (b2 = b0).extend(b1); VERIFY(b2.contains(b0)); From f925dba3d981829fce7b383bec4d52fe41a023a4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:32:09 +0200 Subject: [PATCH 53/86] Fix compilation of BVH example --- unsupported/doc/examples/BVH_Example.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsupported/doc/examples/BVH_Example.cpp b/unsupported/doc/examples/BVH_Example.cpp index 6b6fac075..afb0c94c2 100644 --- a/unsupported/doc/examples/BVH_Example.cpp +++ b/unsupported/doc/examples/BVH_Example.cpp @@ -6,9 +6,7 @@ using namespace Eigen; typedef AlignedBox Box2d; namespace Eigen { - namespace internal { - Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point - } + Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point } struct PointPointMinimizer //how to compute squared distances between points and rectangles From 756ac4a93dea57bf61079c4867e712a8c26d77f6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 12:03:39 +0200 Subject: [PATCH 54/86] Fix doc. --- Eigen/src/plugins/CommonCwiseBinaryOps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index c5eaea16d..03c4aac94 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -55,7 +55,7 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); */ template const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; -/** \returns an expression of \c *this scaled by the scalar factor \a scalar +/** \returns an expression of \a expr scaled by the scalar factor \a scalar * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ From a8c08e8b8e8bdd486a5a27b9f1e92c48ef4361cd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 12:06:10 +0200 Subject: [PATCH 55/86] Implement expr+scalar, scalar+expr, expr-scalar, and scalar-expr as binary expressions, and generalize supported scalar types. The following functors are now deprecated: scalar_add_op, scalar_sub_op, and scalar_rsub_op. --- Eigen/src/Core/functors/BinaryFunctors.h | 31 ++++++------ Eigen/src/Core/util/ForwardDeclarations.h | 1 - Eigen/src/plugins/ArrayCwiseBinaryOps.h | 57 ++++++++++++----------- test/linearstructure.cpp | 16 +++++++ test/mixingtypes.cpp | 12 +++++ 5 files changed, 70 insertions(+), 47 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 77e9e6e93..fd7b5f8b4 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -32,7 +32,13 @@ template struct scalar_sum_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) +#else + scalar_sum_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -315,7 +321,13 @@ template struct scalar_difference_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) +#else + scalar_difference_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -584,7 +596,7 @@ struct functor_traits > /** \internal * \brief Template functor to subtract a fixed scalar to another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op + * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op */ template struct scalar_sub_op { @@ -600,23 +612,6 @@ template struct functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; -/** \internal - * \brief Template functor to subtract a scalar to fixed another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op - */ -template -struct scalar_rsub_op { - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(pset1(m_other), a); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; /** \internal * \brief Template functor to raise a scalar to a power diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 8334446d6..c1295dc5f 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -202,7 +202,6 @@ template struct scalar_square_op; template struct scalar_cube_op; template struct scalar_cast_op; template struct scalar_random_op; -template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; template struct scalar_sign_op; diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 351762e82..8bf4e9b18 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -192,48 +192,49 @@ EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ) #undef EIGEN_MAKE_CWISE_COMP_R_OP // scalar addition - +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum); +#else /** \returns an expression of \c *this with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_plus.cpp * Output: \verbinclude Cwise_plus.out * * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_add_op(scalar)); -} - -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return other + scalar; -} +template +const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; +/** \returns an expression of \a expr with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); +#endif +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference); +#else /** \returns an expression of \c *this with each coeff decremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_minus.cpp * Output: \verbinclude Cwise_minus.out * - * \sa operator+(), operator-=() + * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_sub_op(scalar));; -} - -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return CwiseUnaryOp, const Derived>(other.derived(), internal::scalar_rsub_op(scalar));; -} +template +const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; +/** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +#endif /** \returns an expression of the coefficient-wise && operator of *this and \a other * diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index 7eef976d2..17474af10 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -93,6 +93,22 @@ template void real_complex(DenseIndex rows = MatrixType::Ro g_called = false; VERIFY_IS_APPROX(m1/s, m1/Scalar(s)); VERIFY(g_called && "matrix / real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s+m1.array(), Scalar(s)+m1.array()); + VERIFY(g_called && "real + matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()+s, m1.array()+Scalar(s)); + VERIFY(g_called && "matrix + real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s-m1.array(), Scalar(s)-m1.array()); + VERIFY(g_called && "real - matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()-s, m1.array()-Scalar(s)); + VERIFY(g_called && "matrix - real not properly optimized"); } void test_linearstructure() diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index dee4f35df..cf2207114 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -75,6 +75,18 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(vcf / sf , vcf / complex(sf)); VERIFY_IS_APPROX(vf / scf , vf.template cast >() / scf); + // check scalar increment + VERIFY_IS_APPROX(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_IS_APPROX(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_IS_APPROX(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_IS_APPROX(scd + vd.array() , scd + vd.template cast >().array()); + + // check scalar subtractions + VERIFY_IS_APPROX(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_IS_APPROX(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_IS_APPROX(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_IS_APPROX(scd - vd.array() , scd - vd.template cast >().array()); + // check dot product vf.dot(vf); #if 0 // we get other compilation errors here than just static asserts From a9bb653a684b55fca1c9f58089f94871484ae50c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 12:07:00 +0200 Subject: [PATCH 56/86] Update doc (scalar_add_op is now deprecated) --- doc/CustomizingEigen.dox | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/CustomizingEigen.dox b/doc/CustomizingEigen.dox index cb25f4ec9..607f86658 100644 --- a/doc/CustomizingEigen.dox +++ b/doc/CustomizingEigen.dox @@ -56,13 +56,13 @@ void makeFloor(const MatrixBase& other) { derived() = derived().cw template void makeCeil(const MatrixBase& other) { derived() = derived().cwiseMax(other.derived()); } -const CwiseUnaryOp, Derived> +const CwiseBinaryOp, const Derived, const ConstantReturnType> operator+(const Scalar& scalar) const -{ return CwiseUnaryOp, Derived>(derived(), internal::scalar_add_op(scalar)); } +{ return CwiseBinaryOp, const Derived, const ConstantReturnType>(derived(), Constant(rows(),cols(),scalar)); } -friend const CwiseUnaryOp, Derived> +friend const CwiseBinaryOp, const ConstantReturnType, Derived> operator+(const Scalar& scalar, const MatrixBase& mat) -{ return CwiseUnaryOp, Derived>(mat.derived(), internal::scalar_add_op(scalar)); } +{ return CwiseBinaryOp, const ConstantReturnType, Derived>(Constant(rows(),cols(),scalar), mat.derived()); } \endcode Then one can the following declaration in the config.h or whatever prerequisites header file of his project: From 396d9cfb6eaa62ecafc6f2d02851f0d2f8d6b4ef Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 14:10:07 +0200 Subject: [PATCH 57/86] Generalize expr.pow(scalar), pow(expr,scalar) and pow(scalar,expr). Internal: scalar_pow_op (unary) is removed, and scalar_binary_pow_op is renamed scalar_pow_op. --- Eigen/src/Core/GlobalFunctions.h | 60 ++++++++++++++++++----- Eigen/src/Core/MathFunctions.h | 2 +- Eigen/src/Core/functors/BinaryFunctors.h | 39 ++++++--------- Eigen/src/Core/util/ForwardDeclarations.h | 3 +- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 21 +++++++- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 19 ------- test/mixingtypes.cpp | 47 ++++++++++++------ 7 files changed, 118 insertions(+), 73 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index e489cefec..60e2ccfed 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -89,14 +89,32 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Derived,Constant > + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits::Defined, + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { + return x.derived().pow(exponent); + } + template - inline const Eigen::CwiseUnaryOp, const Derived> + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow) pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { return x.derived().pow(exponent); } +#endif /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. * @@ -106,12 +124,14 @@ namespace Eigen * Output: \verbinclude Cwise_array_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ template - inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); @@ -120,23 +140,39 @@ namespace Eigen /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. * * This function computes the coefficient-wise power between a scalar and an array of exponents. - * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). * * Example: \include Cwise_scalar_power_array.cpp * Output: \verbinclude Cwise_scalar_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ - template - inline const Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived> - pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + pow(const Scalar& x,const Eigen::ArrayBase& x); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits::Defined, + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type + pow(const Scalar& x, const Eigen::ArrayBase& exponents) { - typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x); - return Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived>( - constant_x, - exponents.derived() - ); + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); } + + template + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow) + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } +#endif /** * \brief Component-wise division of a scalar by array elements. diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 2a05ae12d..b683effab 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -498,7 +498,7 @@ template: struct pow_impl { //typedef Scalar retval; - typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { EIGEN_USING_STD_MATH(pow); diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index fd7b5f8b4..9e40303be 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -295,16 +295,24 @@ struct functor_traits > { /** \internal * \brief Template functor to compute the pow of two scalars */ -template -struct scalar_binary_pow_op : binary_op_base +template +struct scalar_pow_op : binary_op_base { - typedef typename ScalarBinaryOpTraits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN + EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op) +#else + scalar_pow_op() { + typedef Scalar LhsScalar; + typedef Exponent RhsScalar; + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC - inline result_type operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } + inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; @@ -613,23 +621,6 @@ struct functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - /** \internal * \brief Template functor to compute the quotient between a scalar and array entries. * \sa class CwiseUnaryOp, Cwise::inverse() diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index c1295dc5f..830f20f90 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -196,7 +196,6 @@ template struct scalar_sin_op; template struct scalar_acos_op; template struct scalar_asin_op; template struct scalar_tan_op; -template struct scalar_pow_op; template struct scalar_inverse_op; template struct scalar_square_op; template struct scalar_cube_op; @@ -209,10 +208,10 @@ template struct scalar_igamma_op; template struct scalar_igammac_op; template struct scalar_betainc_op; +template struct scalar_pow_op; template struct scalar_hypot_op; template struct scalar_product_op; template struct scalar_quotient_op; -template struct scalar_binary_pow_op; } // end namespace internal diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 8bf4e9b18..1e20e35d7 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -82,7 +82,26 @@ max * Example: \include Cwise_array_power_array.cpp * Output: \verbinclude Cwise_array_power_array.out */ -EIGEN_MAKE_CWISE_BINARY_OP(pow,binary_pow) +EIGEN_MAKE_CWISE_BINARY_OP(pow,pow) + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow); +#else +/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent + * + * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression. + * + * This function computes the coefficient-wise power. The function MatrixBase::pow() in the + * unsupported module MatrixFunctions computes the matrix power. + * + * Example: \include Cwise_pow.cpp + * Output: \verbinclude Cwise_pow.out + * + * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log() + */ +template +const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; +#endif // TODO code generating macros could be moved to Macros.h and could include generation of documentation diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 775fa6ee0..4a6361d8a 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -26,7 +26,6 @@ typedef CwiseUnaryOp, const Derived> LgammaRe typedef CwiseUnaryOp, const Derived> DigammaReturnType; typedef CwiseUnaryOp, const Derived> ErfReturnType; typedef CwiseUnaryOp, const Derived> ErfcReturnType; -typedef CwiseUnaryOp, const Derived> PowReturnType; typedef CwiseUnaryOp, const Derived> SquareReturnType; typedef CwiseUnaryOp, const Derived> CubeReturnType; typedef CwiseUnaryOp, const Derived> RoundReturnType; @@ -388,24 +387,6 @@ erfc() const return ErfcReturnType(derived()); } -/** \returns an expression of the coefficient-wise power of *this to the given exponent. - * - * This function computes the coefficient-wise power. The function MatrixBase::pow() in the - * unsupported module MatrixFunctions computes the matrix power. - * - * Example: \include Cwise_pow.cpp - * Output: \verbinclude Cwise_pow.out - * - * \sa exp(), log() - */ -EIGEN_DEVICE_FUNC -inline const PowReturnType -pow(const Scalar& exponent) const -{ - return PowReturnType(derived(), internal::scalar_pow_op(exponent)); -} - - /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include Cwise_inverse.cpp diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index cf2207114..b38271a17 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -23,10 +23,18 @@ #endif +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } + #include "main.h" using namespace std; +#define VERIFY_MIX_SCALAR(XPR,REF) \ + g_called = false; \ + VERIFY_IS_APPROX(XPR,REF); \ + VERIFY( g_called && #XPR" not properly optimized"); + template void mixingtypes(int size = SizeAtCompileType) { typedef std::complex CF; @@ -66,26 +74,34 @@ template void mixingtypes(int size = SizeAtCompileType) #endif // check scalar products - VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); - VERIFY_IS_APPROX(sd * vcd , complex(sd) * vcd); - VERIFY_IS_APPROX(vf * scf , vf.template cast >() * scf); - VERIFY_IS_APPROX(scd * vd , scd * vd.template cast >()); + VERIFY_MIX_SCALAR(vcf * sf , vcf * complex(sf)); + VERIFY_MIX_SCALAR(sd * vcd , complex(sd) * vcd); + VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); + VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); // check scalar quotients - VERIFY_IS_APPROX(vcf / sf , vcf / complex(sf)); - VERIFY_IS_APPROX(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); + VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); // check scalar increment - VERIFY_IS_APPROX(vcf.array() + sf , vcf.array() + complex(sf)); - VERIFY_IS_APPROX(sd + vcd.array(), complex(sd) + vcd.array()); - VERIFY_IS_APPROX(vf.array() + scf, vf.template cast >().array() + scf); - VERIFY_IS_APPROX(scd + vd.array() , scd + vd.template cast >().array()); + VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_MIX_SCALAR(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_MIX_SCALAR(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_MIX_SCALAR(scd + vd.array() , scd + vd.template cast >().array()); // check scalar subtractions - VERIFY_IS_APPROX(vcf.array() - sf , vcf.array() - complex(sf)); - VERIFY_IS_APPROX(sd - vcd.array(), complex(sd) - vcd.array()); - VERIFY_IS_APPROX(vf.array() - scf, vf.template cast >().array() - scf); - VERIFY_IS_APPROX(scd - vd.array() , scd - vd.template cast >().array()); + VERIFY_MIX_SCALAR(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_MIX_SCALAR(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_MIX_SCALAR(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast >().array()); + + // check scalar powers + VERIFY_MIX_SCALAR( pow(vcf.array(), sf), pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( vcf.array().pow(sf) , pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( pow(sd, vcd.array()), pow(complex(sd), vcd.array()) ); + VERIFY_MIX_SCALAR( pow(vf.array(), scf), pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( vf.array().pow(scf) , pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( pow(scd, vd.array()), pow(scd, vd.template cast >().array()) ); // check dot product vf.dot(vf); @@ -215,6 +231,9 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + VERIFY_IS_APPROX( pow(md.array(),mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( pow(mcd.array(),md.array()), mcd.array().pow(md.template cast().eval().array()) ); + rcd = mcd; VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); rcd = mcd; From 5d3820373513c950fbd238b0d8fd73bf3cd3e7fb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:06:03 +0200 Subject: [PATCH 58/86] Update Tensor module to use bind1st_op and bind2nd_op --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 74 +++++++++---------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 8f3580ba7..aeaa9dea9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -204,64 +204,62 @@ class TensorBase } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> pow(Scalar exponent) const { - return unaryExpr(internal::scalar_pow_op(exponent)); + return unaryExpr(internal::bind2nd_op >(exponent)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar rhs) const { - return unaryExpr(internal::scalar_add_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, const Derived> + const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar lhs, const Derived& rhs) { - return rhs + lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator- (Scalar rhs) const { EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return unaryExpr(internal::scalar_sub_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, - const TensorCwiseUnaryOp, const Derived> > + const TensorCwiseUnaryOp >, const Derived> operator- (Scalar lhs, const Derived& rhs) { - return -rhs + lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator* (Scalar rhs) const { - return unaryExpr(internal::scalar_multiple_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, const Derived> + const TensorCwiseUnaryOp >, const Derived> operator* (Scalar lhs, const Derived& rhs) { - return rhs * lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator/ (Scalar rhs) const { - return unaryExpr(internal::scalar_quotient1_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, - const TensorCwiseUnaryOp, const Derived> > + const TensorCwiseUnaryOp >, const Derived> operator/ (Scalar lhs, const Derived& rhs) { - return rhs.inverse() * lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC @@ -371,66 +369,66 @@ class TensorBase // Comparisons and tests. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } // comparisons and tests for Scalars EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<(Scalar threshold) const { return operator<(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<=(Scalar threshold) const { return operator<=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>(Scalar threshold) const { return operator>(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>=(Scalar threshold) const { return operator>=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator==(Scalar threshold) const { return operator==(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator!=(Scalar threshold) const { return operator!=(constant(threshold)); } From 62134082aa0dd33cb7328f9f9a86491d21a52444 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:06:35 +0200 Subject: [PATCH 59/86] Update AutoDiffScalar wrt to scalar-multiple. --- Eigen/src/Core/util/Macros.h | 2 + .../Eigen/src/AutoDiff/AutoDiffScalar.h | 89 +++++++++---------- 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index f3c6512c9..031e0892e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -462,6 +462,8 @@ #define EIGEN_CAT2(a,b) a ## b #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b) +#define EIGEN_COMMA , + // convert a token to a string #define EIGEN_MAKESTRING2(a) #a #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 0ed91fdb7..feaeeaf5a 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -30,6 +30,13 @@ template struct auto_diff_special_op; } // end namespace internal +template class AutoDiffScalar; + +template +inline AutoDiffScalar MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) { + return AutoDiffScalar(value,der); +} + /** \class AutoDiffScalar * \brief A scalar type replacement with automatic differentation capability * @@ -257,20 +264,16 @@ class AutoDiffScalar -m_derivatives); } - inline const AutoDiffScalar, const DerType> > + inline const AutoDiffScalar operator*(const Scalar& other) const { - return AutoDiffScalar, const DerType> >( - m_value * other, - (m_derivatives * other)); + return MakeAutoDiffScalar(m_value * other, m_derivatives * other); } - friend inline const AutoDiffScalar, const DerType> > + friend inline const AutoDiffScalar operator*(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, const DerType> >( - a.value() * other, - a.derivatives() * other); + return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other); } // inline const AutoDiffScalar, DerType>::Type > @@ -289,20 +292,16 @@ class AutoDiffScalar // a.derivatives() * other); // } - inline const AutoDiffScalar, const DerType> > + inline const AutoDiffScalar operator/(const Scalar& other) const { - return AutoDiffScalar, const DerType> >( - m_value / other, - (m_derivatives * (Scalar(1)/other))); + return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1)/other))); } - friend inline const AutoDiffScalar, const DerType> > + friend inline const AutoDiffScalar operator/(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, const DerType> >( - other / a.value(), - a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); + return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); } // inline const AutoDiffScalar, DerType>::Type > @@ -322,34 +321,29 @@ class AutoDiffScalar // } template - inline const AutoDiffScalar, - const CwiseBinaryOp, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > > > + inline const AutoDiffScalar EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) >,Scalar,product) > operator/(const AutoDiffScalar& other) const { internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - const CwiseBinaryOp, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > > >( + return MakeAutoDiffScalar( m_value / other.value(), - ((m_derivatives * other.value()) - (m_value * other.derivatives())) + ((m_derivatives * other.value()) - (other.derivatives() * m_value)) * (Scalar(1)/(other.value()*other.value()))); } template inline const AutoDiffScalar, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type> > > + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) > > operator*(const AutoDiffScalar& other) const { internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > >( + return MakeAutoDiffScalar( m_value * other.value(), - (m_derivatives * other.value()) + (m_value * other.derivatives())); + (m_derivatives * other.value()) + (other.derivatives() * m_value)); } inline AutoDiffScalar& operator*=(const Scalar& other) @@ -533,11 +527,11 @@ struct ScalarBinaryOpTraits > #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ template \ - inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename Eigen::internal::remove_all::type> > \ + inline const Eigen::AutoDiffScalar< \ + EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all::type, typename Eigen::internal::traits::type>::Scalar, product) > \ FUNC(const Eigen::AutoDiffScalar& x) { \ using namespace Eigen; \ typedef typename Eigen::internal::traits::type>::Scalar Scalar; \ - typedef AutoDiffScalar, const typename Eigen::internal::remove_all::type> > ReturnType; \ CODE; \ } @@ -570,46 +564,45 @@ inline AutoDiffScalar::type::Plain EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs, using std::abs; - return ReturnType(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) + return Eigen::MakeAutoDiffScalar(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs2, using numext::abs2; - return ReturnType(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) + return Eigen::MakeAutoDiffScalar(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sqrt, using std::sqrt; Scalar sqrtx = sqrt(x.value()); - return ReturnType(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) + return Eigen::MakeAutoDiffScalar(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cos, using std::cos; using std::sin; - return ReturnType(cos(x.value()), x.derivatives() * (-sin(x.value())));) + return Eigen::MakeAutoDiffScalar(cos(x.value()), x.derivatives() * (-sin(x.value())));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sin, using std::sin; using std::cos; - return ReturnType(sin(x.value()),x.derivatives() * cos(x.value()));) + return Eigen::MakeAutoDiffScalar(sin(x.value()),x.derivatives() * cos(x.value()));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(exp, using std::exp; Scalar expx = exp(x.value()); - return ReturnType(expx,x.derivatives() * expx);) + return Eigen::MakeAutoDiffScalar(expx,x.derivatives() * expx);) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log, using std::log; - return ReturnType(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) + return Eigen::MakeAutoDiffScalar(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) template -inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename internal::remove_all::type> > -pow(const Eigen::AutoDiffScalar& x, const typename internal::traits::type>::Scalar &y) +inline const Eigen::AutoDiffScalar< +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,typename internal::traits::type>::Scalar,product) > +pow(const Eigen::AutoDiffScalar &x, const typename internal::traits::type>::Scalar &y) { using namespace Eigen; typedef typename internal::remove_all::type DerTypeCleaned; typedef typename Eigen::internal::traits::Scalar Scalar; - return AutoDiffScalar, const DerTypeCleaned> >( - std::pow(x.value(),y), - x.derivatives() * (y * std::pow(x.value(),y-1))); + return Eigen::MakeAutoDiffScalar(std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } @@ -634,17 +627,17 @@ atan2(const AutoDiffScalar& a, const AutoDiffScalar& b) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tan, using std::tan; using std::cos; - return ReturnType(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) + return Eigen::MakeAutoDiffScalar(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(asin, using std::sqrt; using std::asin; - return ReturnType(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) + return Eigen::MakeAutoDiffScalar(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos, using std::sqrt; using std::acos; - return ReturnType(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) + return Eigen::MakeAutoDiffScalar(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) #undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY From 70dad84b737374e97319733ebff6f36e86ac384d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:26:37 +0200 Subject: [PATCH 60/86] Generalize expr/expr and scalar/expr wrt scalar types. --- Eigen/src/Core/GlobalFunctions.h | 34 +++++++++++++++++++------ Eigen/src/plugins/ArrayCwiseBinaryOps.h | 4 +-- test/mixingtypes.cpp | 4 ++- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 60e2ccfed..5ffa6c694 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -175,18 +175,36 @@ namespace Eigen #endif /** - * \brief Component-wise division of a scalar by array elements. + * \brief Component-wise division of the scalar \a s by array elements of \a a. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + * + * \relates ArrayBase **/ - template - inline const Eigen::CwiseUnaryOp, const Derived> - operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + operator/(const Scalar& s,const Eigen::ArrayBase& a); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits::Defined, + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient) >::type + operator/(const Scalar& s, const Eigen::ArrayBase& a) { - return Eigen::CwiseUnaryOp, const Derived>( - a.derived(), - Eigen::internal::scalar_inverse_mult_op(s) - ); + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient)( + typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); } + template + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient) + operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient)( + typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); + } +#endif + /** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. * * This function computes the coefficient-wise incomplete gamma function. diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 1e20e35d7..0c1429c75 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -17,10 +17,10 @@ operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of \c *this and \a other diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index b38271a17..fe8c16470 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -82,6 +82,8 @@ template void mixingtypes(int size = SizeAtCompileType) // check scalar quotients VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vf.array() / scf, vf.template cast >().array() / scf); + VERIFY_MIX_SCALAR(scd / vd.array() , scd / vd.template cast >().array()); // check scalar increment VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); @@ -225,7 +227,7 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); -// VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); + VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); From 1004c4df99a3e4a019f05b83badb06f4e2df5ee6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:27:28 +0200 Subject: [PATCH 61/86] Cleanup unused functors. --- Eigen/src/Core/functors/BinaryFunctors.h | 113 ----------------------- Eigen/src/Core/util/BlasUtil.h | 13 --- 2 files changed, 126 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 9e40303be..2c1331208 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -525,119 +525,6 @@ template struct bind2nd_op : BinaryOp { }; template struct functor_traits > : functor_traits {}; -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - - -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to subtract a fixed scalar to another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op - */ -template -struct scalar_sub_op { - EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; -}; -template -struct functor_traits > -{ enum { PacketAccess = packet_traits::HasDiv, Cost = NumTraits::template Div::Cost }; }; - } // end namespace internal diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 7e8f90d88..a85ad558f 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -316,19 +316,6 @@ struct blas_traits, NestedXpr, const Cwi { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } }; -// pop scalar multiple (using deprecated scalar_multiple_op) -template -struct blas_traits, NestedXpr> > - : blas_traits -{ - typedef blas_traits Base; - typedef CwiseUnaryOp, NestedXpr> XprType; - typedef typename Base::ExtractType ExtractType; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } - static inline Scalar extractScalarFactor(const XprType& x) - { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); } -}; - // pop opposite template struct blas_traits, NestedXpr> > From c4d10e921f02409d771e960647beae0b6d26dc88 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Tue, 14 Jun 2016 19:44:07 +0100 Subject: [PATCH 62/86] Implement exclusive scan option --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 12 +++++----- .../Eigen/CXX11/src/Tensor/TensorScan.h | 22 ++++++++++++----- unsupported/test/cxx11_tensor_scan.cpp | 24 +++++++++++++++++++ 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 8f3580ba7..87fa672f4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -486,22 +486,22 @@ class TensorBase typedef TensorScanOp, const Derived> TensorScanSumOp; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanSumOp - cumsum(const Index& axis) const { - return TensorScanSumOp(derived(), axis); + cumsum(const Index& axis, bool exclusive = false) const { + return TensorScanSumOp(derived(), axis, exclusive); } typedef TensorScanOp, const Derived> TensorScanProdOp; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanProdOp - cumprod(const Index& axis) const { - return TensorScanProdOp(derived(), axis); + cumprod(const Index& axis, bool exclusive = false) const { + return TensorScanProdOp(derived(), axis, exclusive); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanOp - scan(const Index& axis, const Reducer& reducer) const { - return TensorScanOp(derived(), axis, reducer); + scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { + return TensorScanOp(derived(), axis, exclusive, reducer); } // Reductions. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 5207f6a8d..1aa196b84 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -57,8 +57,8 @@ public: typedef typename Eigen::internal::traits::Index Index; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp( - const XprType& expr, const Index& axis, const Op& op = Op()) - : m_expr(expr), m_axis(axis), m_accumulator(op) {} + const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op()) + : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index axis() const { return m_axis; } @@ -66,11 +66,14 @@ public: const XprType& expression() const { return m_expr; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op accumulator() const { return m_accumulator; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + bool exclusive() const { return m_exclusive; } protected: typename XprType::Nested m_expr; const Index m_axis; const Op m_accumulator; + const bool m_exclusive; }; // Eval as rvalue @@ -99,6 +102,7 @@ struct TensorEvaluator, Device> { : m_impl(op.expression(), device), m_device(device), m_axis(op.axis()), + m_exclusive(op.exclusive()), m_accumulator(op.accumulator()), m_dimensions(m_impl.dimensions()), m_size(m_dimensions[m_axis]), @@ -168,6 +172,7 @@ protected: TensorEvaluator m_impl; const Device& m_device; const Index m_axis; + const bool m_exclusive; Op m_accumulator; const Dimensions& m_dimensions; const Index& m_size; @@ -176,7 +181,7 @@ protected: // TODO(ibab) Parallelize this single-threaded implementation if desired EIGEN_DEVICE_FUNC void accumulateTo(Scalar* data) { - // We fix the index along the scan axis to 0 and perform an + // We fix the index along the scan axis to 0 and perform a // scan per remaining entry. The iteration is split into two nested // loops to avoid an integer division by keeping track of each idx1 and idx2. for (Index idx1 = 0; idx1 < dimensions().TotalSize() / m_size; idx1 += m_stride) { @@ -184,12 +189,17 @@ protected: // Calculate the starting offset for the scan Index offset = idx1 * m_size + idx2; - // Compute the prefix sum along the axis, starting at the calculated offset + // Compute the scan along the axis, starting at the calculated offset CoeffReturnType accum = m_accumulator.initialize(); for (Index idx3 = 0; idx3 < m_size; idx3++) { Index curr = offset + idx3 * m_stride; - m_accumulator.reduce(m_impl.coeff(curr), &accum); - data[curr] = m_accumulator.finalize(accum); + if (m_exclusive) { + data[curr] = m_accumulator.finalize(accum); + m_accumulator.reduce(m_impl.coeff(curr), &accum); + } else { + m_accumulator.reduce(m_impl.coeff(curr), &accum); + data[curr] = m_accumulator.finalize(accum); + } } } } diff --git a/unsupported/test/cxx11_tensor_scan.cpp b/unsupported/test/cxx11_tensor_scan.cpp index dbd3023d7..bafa6c96e 100644 --- a/unsupported/test/cxx11_tensor_scan.cpp +++ b/unsupported/test/cxx11_tensor_scan.cpp @@ -38,6 +38,30 @@ static void test_1d_scan() } } +template +static void test_1d_inclusive_scan() +{ + int size = 50; + Tensor tensor(size); + tensor.setRandom(); + Tensor result = tensor.cumsum(0, true); + + VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0)); + + float accum = 0; + for (int i = 0; i < size; i++) { + VERIFY_IS_EQUAL(result(i), accum); + accum += tensor(i); + } + + accum = 1; + result = tensor.cumprod(0, true); + for (int i = 0; i < size; i++) { + VERIFY_IS_EQUAL(result(i), accum); + accum *= tensor(i); + } +} + template static void test_4d_scan() { From 101ea26f5e18919972b321b5f7e3ef4e07be3fd6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 00:01:16 +0200 Subject: [PATCH 63/86] Include the cost of stores in unrolling (also fix infinite unrolling with expression costing 0 like Constant) --- Eigen/src/Core/AssignEvaluator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f966724cc..1df717bac 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -116,9 +116,9 @@ private: : 1, UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), + && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) + && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) }; public: From 4e7c3af874e9e1273b5bf3acdc4b53e8c6bdf086 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 00:04:10 +0200 Subject: [PATCH 64/86] Cleanup useless helper: internal::product_result_scalar --- Eigen/src/Core/Product.h | 36 +----------------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index bad289761..ae0c94b38 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -16,40 +16,6 @@ template class Pro namespace internal { -// Determine the scalar of Product. This is normally the same as Lhs::Scalar times -// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. -// TODO: this could be removed once ScalarBinaryOpTraits handles void. -template::Shape, - typename RhsShape = typename evaluator_traits::Shape > -struct product_result_scalar -{ - typedef typename ScalarBinaryOpTraits::ReturnType Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - template struct traits > { @@ -60,7 +26,7 @@ struct traits > typedef MatrixXpr XprKind; - typedef typename product_result_scalar::Scalar Scalar; + typedef typename ScalarBinaryOpTraits::Scalar, typename traits::Scalar>::ReturnType Scalar; typedef typename product_promote_storage_type::ret>::ret StorageKind; From aedc5be1d6b6b42f5037c4005b437f3e552ac101 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 14 Jun 2016 17:51:47 -0700 Subject: [PATCH 65/86] Avoid generating pseudo random numbers that are multiple of 5: this helps spread the load over multiple cpus without havind to rely on work stealing. --- .../src/ThreadPool/NonBlockingThreadPool.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index c094563b7..30b292352 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -99,10 +99,12 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - bool inited; + PerThread() : pool(NULL), index(-1) { + rand = std::hash()(std::this_thread::get_id()); + } NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. unsigned index; // Worker thread index in pool. - unsigned rand; // Random generator state. + uint64_t rand; // Random generator state. }; Environment env_; @@ -235,17 +237,18 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { return -1; } - PerThread* GetPerThread() { + static EIGEN_STRONG_INLINE PerThread* GetPerThread() { EIGEN_THREAD_LOCAL PerThread per_thread_; PerThread* pt = &per_thread_; - if (pt->inited) return pt; - pt->inited = true; - pt->rand = static_cast(std::hash()(std::this_thread::get_id())); return pt; } - static unsigned Rand(unsigned* state) { - return *state = *state * 1103515245 + 12345; + static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return (current ^ (current >> 22)) >> (22 + (current >> 61)); } }; From c55035b9c0c894551850d122ac8b0cf1a053c28e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 09:57:33 +0200 Subject: [PATCH 66/86] Include the cost of stores in unrolling of triangular expressions. --- Eigen/src/Core/TriangularMatrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 8731e9127..c599e0b32 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -794,7 +794,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr enum { unroll = DstXprType::SizeAtCompileTime != Dynamic && SrcEvaluatorType::CoeffReadCost < HugeCost - && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT + && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT }; triangular_assignment_loop::run(kernel); From 4794834397aa0e2a6570a2f736b65b72b7db1aad Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 09:58:49 +0200 Subject: [PATCH 67/86] Propagate functor to ScalarBinaryOpTraits --- Eigen/src/Core/GlobalFunctions.h | 6 +++--- Eigen/src/Core/util/Macros.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 5ffa6c694..0361b40ad 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -103,7 +103,7 @@ namespace Eigen #else template inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits::Defined, + && ScalarBinaryOpTraits >::Defined, const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { return x.derived().pow(exponent); @@ -157,7 +157,7 @@ namespace Eigen #else template inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits::Defined, + && ScalarBinaryOpTraits >::Defined, const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type pow(const Scalar& x, const Eigen::ArrayBase& exponents) { @@ -188,7 +188,7 @@ namespace Eigen #else template inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits::Defined, + && ScalarBinaryOpTraits >::Defined, const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient) >::type operator/(const Scalar& s, const Eigen::ArrayBase& a) { diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 031e0892e..87cc44657 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -914,7 +914,7 @@ namespace Eigen { } \ \ template EIGEN_DEVICE_FUNC inline \ - typename internal::enable_if::Defined, \ + typename internal::enable_if >::Defined, \ const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME) >::type \ (METHOD)(const T& scalar) const { \ return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME)(derived(), \ @@ -930,7 +930,7 @@ namespace Eigen { } \ \ template EIGEN_DEVICE_FUNC inline friend \ - typename internal::enable_if::Defined, \ + typename internal::enable_if >::Defined, \ const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME) >::type \ (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME)( \ From eb91345d641cc1aa7a62af1bf6db3e38a6d7225a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 15:22:03 +0200 Subject: [PATCH 68/86] Move scalar/expr to ArrayBase and fix documentation --- Eigen/src/Core/GlobalFunctions.h | 31 ------------------------ Eigen/src/plugins/ArrayCwiseBinaryOps.h | 24 ++++++++++++++---- Eigen/src/plugins/CommonCwiseBinaryOps.h | 6 ++--- 3 files changed, 22 insertions(+), 39 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 0361b40ad..b9c3ec25b 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -173,37 +173,6 @@ namespace Eigen typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); } #endif - - /** - * \brief Component-wise division of the scalar \a s by array elements of \a a. - * - * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). - * - * \relates ArrayBase - **/ -#ifdef EIGEN_PARSED_BY_DOXYGEN - template - inline const CwiseBinaryOp,Constant,Derived> - operator/(const Scalar& s,const Eigen::ArrayBase& a); -#else - template - inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits >::Defined, - const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient) >::type - operator/(const Scalar& s, const Eigen::ArrayBase& a) - { - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient)( - typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); - } - - template - inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient) - operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) - { - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient)( - typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); - } -#endif /** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. * diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 0c1429c75..6cb609604 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -100,7 +100,7 @@ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow); * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log() */ template -const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; +const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; #endif @@ -224,13 +224,13 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum); * \sa operator+=(), operator-() */ template -const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; /** \returns an expression of \a expr with each coeff incremented by the constant \a scalar * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template friend -const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); +const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); #endif #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -246,13 +246,27 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference); * \sa operator+=(), operator-() */ template -const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; /** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template friend -const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +#endif + + +#ifndef EIGEN_PARSED_BY_DOXYGEN + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient) +#else + /** + * \brief Component-wise division of the scalar \a s by array elements of \a a. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + */ + template friend + inline const CwiseBinaryOp,Constant,Derived> + operator/(const T& s,const StorageBaseType& a); #endif /** \returns an expression of the coefficient-wise && operator of *this and \a other diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index 03c4aac94..afac08a3f 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -54,13 +54,13 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template -const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; /** \returns an expression of \a expr scaled by the scalar factor \a scalar * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template friend -const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); +const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); #endif @@ -73,5 +73,5 @@ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient); * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template -const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; #endif From 67c12531e567629e84713fbb3150560c916bd08c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 18:11:33 +0200 Subject: [PATCH 69/86] Fix warnings with gcc --- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 6 +++--- Eigen/src/plugins/CommonCwiseBinaryOps.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 6cb609604..19e25ab62 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -85,7 +85,7 @@ max EIGEN_MAKE_CWISE_BINARY_OP(pow,pow) #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow); +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow) #else /** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent * @@ -212,7 +212,7 @@ EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ) // scalar addition #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum); +EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum) #else /** \returns an expression of \c *this with each coeff incremented by the constant \a scalar * @@ -234,7 +234,7 @@ const CwiseBinaryOp,Constant,Derived> opera #endif #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference); +EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference) #else /** \returns an expression of \c *this with each coeff decremented by the constant \a scalar * diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index afac08a3f..b51ee9e4c 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -47,7 +47,7 @@ binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other, const Cu #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); +EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product) #else /** \returns an expression of \c *this scaled by the scalar factor \a scalar * @@ -66,7 +66,7 @@ const CwiseBinaryOp,Constant,Derived> o #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient); +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient) #else /** \returns an expression of \c *this divided by the scalar value \a scalar * From b055590e9135ffe762775ec919e490513b6974fa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 16 Jun 2016 11:37:40 -0700 Subject: [PATCH 70/86] Made log1p_impl usable inside a GPU kernel --- Eigen/src/Core/MathFunctions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index ece04b754..342b47ced 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -462,7 +462,7 @@ struct arg_retval template::IsComplex > struct log1p_impl { - static inline Scalar run(const Scalar& x) + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) typedef typename NumTraits::Real RealScalar; @@ -472,7 +472,7 @@ struct log1p_impl } }; -#if EIGEN_HAS_CXX11_MATH +#if EIGEN_HAS_CXX11_MATH && !defined(__CUDACC__) template struct log1p_impl { static inline Scalar run(const Scalar& x) From de32f8d656c3ea7855ced77457ea661e43d417b7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 20 Jun 2016 10:46:45 -0700 Subject: [PATCH 71/86] Fixed the printing of rank-0 tensors --- unsupported/Eigen/CXX11/src/Tensor/TensorIO.h | 68 +++++++++++++------ unsupported/test/cxx11_tensor_io.cpp | 16 +++++ 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 38a833f82..3db692ac6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -17,34 +17,58 @@ template<> struct significant_decimals_impl : significant_decimals_default_impl {}; -} -template -std::ostream& operator << (std::ostream& os, const TensorBase& expr) { - // Evaluate the expression if needed - TensorForcedEvalOp eval = expr.eval(); - TensorEvaluator, DefaultDevice> tensor(eval, DefaultDevice()); - tensor.evalSubExprsIfNeeded(NULL); - - typedef typename internal::remove_const::type Scalar; - typedef typename T::Index Index; - typedef typename TensorEvaluator, DefaultDevice>::Dimensions Dimensions; - const Index total_size = internal::array_prod(tensor.dimensions()); - - // Print the tensor as a 1d vector or a 2d matrix. - static const int rank = internal::array_size::value; - if (rank == 0) { - os << tensor.coeff(0); - } else if (rank == 1) { - Map > array(const_cast(tensor.data()), total_size); - os << array; - } else { +// Print the tensor as a 2d matrix +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); - static const int layout = TensorEvaluator, DefaultDevice>::Layout; + static const int layout = Tensor::Layout; Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); os << matrix; } +}; + + +// Print the tensor as a vector +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + Map > array(const_cast(tensor.data()), total_size); + os << array; + } +}; + + +// Print the tensor as a scalar +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + os << tensor.coeff(0); + } +}; +} + +template +std::ostream& operator << (std::ostream& os, const TensorBase& expr) { + typedef TensorEvaluator, DefaultDevice> Evaluator; + typedef typename Evaluator::Dimensions Dimensions; + + // Evaluate the expression if needed + TensorForcedEvalOp eval = expr.eval(); + Evaluator tensor(eval, DefaultDevice()); + tensor.evalSubExprsIfNeeded(NULL); + + // Print the result + static const int rank = internal::array_size::value; + internal::TensorPrinter::run(os, tensor); // Cleanup. tensor.cleanup(); diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp index 8bbcf7089..8267dcadd 100644 --- a/unsupported/test/cxx11_tensor_io.cpp +++ b/unsupported/test/cxx11_tensor_io.cpp @@ -13,6 +13,20 @@ #include +template +static void test_output_0d() +{ + Tensor tensor; + tensor() = 123; + + std::stringstream os; + os << tensor; + + std::string expected("123"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + template static void test_output_1d() { @@ -101,6 +115,8 @@ static void test_output_const() void test_cxx11_tensor_io() { + CALL_SUBTEST(test_output_0d()); + CALL_SUBTEST(test_output_0d()); CALL_SUBTEST(test_output_1d()); CALL_SUBTEST(test_output_1d()); CALL_SUBTEST(test_output_2d()); From c58df317473277b297dd018a2dc3d8fe85b00c92 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 21 Jun 2016 09:22:43 -0700 Subject: [PATCH 72/86] Handle empty tensors in the print functions --- unsupported/Eigen/CXX11/src/Tensor/TensorIO.h | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 3db692ac6..58ffaefab 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -26,10 +26,15 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); - static const int layout = Tensor::Layout; - Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); - os << matrix; + if (total_size == 0) { + os << "Empty tensor of rank " << Rank; + } + else { + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); + static const int layout = Tensor::Layout; + Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); + os << matrix; + } } }; @@ -41,8 +46,13 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - Map > array(const_cast(tensor.data()), total_size); - os << array; + if (total_size == 0) { + os << "Empty tensor of rank 1"; + } + else { + Map > array(const_cast(tensor.data()), total_size); + os << array; + } } }; From f8fcd6b32d6b1a3613330e553bb3bc52a0007192 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Jun 2016 16:03:11 -0700 Subject: [PATCH 73/86] Turned the constructor of the PerThread struct into what is effectively a constant expression to make the code compatible with a wider range of compilers --- .../Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 30b292352..8d4973ded 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -99,9 +99,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - PerThread() : pool(NULL), index(-1) { - rand = std::hash()(std::this_thread::get_id()); - } + PerThread() : pool(NULL), index(-1), rand(0) { } NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. unsigned index; // Worker thread index in pool. uint64_t rand; // Random generator state. @@ -122,6 +120,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { PerThread* pt = GetPerThread(); pt->pool = this; pt->index = index; + pt->rand = std::hash()(std::this_thread::get_id()); Queue* q = queues_[index]; EventCount::Waiter* waiter = &waiters_[index]; for (;;) { From a29a2cb4ff4eb153d8e725f126f178371cc0468c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Jun 2016 16:43:02 -0700 Subject: [PATCH 74/86] Silenced a couple of compilation warnings generated by xcode --- .../Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 8d4973ded..c6db01a6b 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -99,7 +99,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - PerThread() : pool(NULL), index(-1), rand(0) { } + constexpr PerThread() : pool(NULL), index(-1), rand(0) { } NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. unsigned index; // Worker thread index in pool. uint64_t rand; // Random generator state. @@ -247,7 +247,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { // Update the internal state *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; // Generate the random output (using the PCG-XSH-RS scheme) - return (current ^ (current >> 22)) >> (22 + (current >> 61)); + return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); } }; From a3f7edf7e7672094190e04a0b4417de1abfa3de5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 10:25:04 +0200 Subject: [PATCH 75/86] Biug 1242: fix comma init with empty matrices. --- Eigen/src/Core/CommaInitializer.h | 7 +++++-- test/commainitializer.cpp | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 2abc6605c..38b1112ff 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -80,8 +80,11 @@ struct CommaInitializer EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { - if(other.cols()==0 || other.rows()==0) + if(other.rows()==0) + { + m_col += other.cols(); return *this; + } if (m_col==m_xpr.cols()) { m_row+=m_currentBlockRows; @@ -90,7 +93,7 @@ struct CommaInitializer eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); } - eigen_assert(m_col A1; + Matrix A2; + Matrix B; + B << A1, A2; + } + { + Matrix A1; + Matrix A2; + Matrix B; + B << A1, + A2; + } + } + } From 76faf4a9657efeed089aeedc98a769410c32d3d7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 14:27:20 +0200 Subject: [PATCH 76/86] Introduce a NumTraits::Literal type to be used for literals, and improve mixing type support in operations between arrays and scalars: - 2 * ArrayXcf is now optimized in the sense that the integer 2 is properly promoted to a float instead of a complex (fix a regression) - 2.1 * ArrayXi is now forbiden (previously, 2.1 was converted to 2) - This mechanism should be applicable to any custom scalar type, assuming NumTraits::Literal is properly defined (it defaults to T) --- Eigen/src/Core/NumTraits.h | 11 ++++++++--- Eigen/src/Core/util/Macros.h | 30 ++++++++---------------------- Eigen/src/Core/util/XprHelper.h | 28 ++++++++++++++++++++++++++++ test/mixingtypes.cpp | 5 +++++ test/nesting_ops.cpp | 4 ++-- 5 files changed, 51 insertions(+), 27 deletions(-) diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index e065fa714..03f64a8e9 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -22,14 +22,16 @@ namespace Eigen { * This class stores enums, typedefs and static methods giving information about a numeric type. * * The provided data consists of: - * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real, - * then \a Real is just a typedef to \a T. If \a T is \c std::complex then \a Real + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real * is a typedef to \a U. - * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex, Literal is defined as \c U. + * Of course, this type must be fully compatible with \a T. In doubt, just use \a T here. * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what * this means, just use \a T here. * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex @@ -84,6 +86,7 @@ template struct GenericNumTraits T >::type NonInteger; typedef T Nested; + typedef T Literal; EIGEN_DEVICE_FUNC static inline Real epsilon() @@ -145,6 +148,7 @@ template struct NumTraits > : GenericNumTraits > { typedef _Real Real; + typedef typename NumTraits<_Real>::Literal Literal; enum { IsComplex = 1, RequireInitialization = NumTraits<_Real>::RequireInitialization, @@ -168,6 +172,7 @@ struct NumTraits > typedef typename NumTraits::NonInteger NonIntegerScalar; typedef Array NonInteger; typedef ArrayType & Nested; + typedef typename NumTraits::Literal Literal; enum { IsComplex = NumTraits::IsComplex, diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 87cc44657..6de21d2bb 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -906,35 +906,21 @@ namespace Eigen { const typename internal::plain_constant_type::type, const EXPR> #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ - EIGEN_DEVICE_FUNC inline \ - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME) \ - (METHOD)(const Scalar& scalar) const { \ - return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME)(derived(), \ - typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ - } \ - \ template EIGEN_DEVICE_FUNC inline \ - typename internal::enable_if >::Defined, \ - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME) >::type \ + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg >::Defined>::type,OPNAME) \ (METHOD)(const T& scalar) const { \ - return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME)(derived(), \ - typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ + typedef typename internal::promote_scalar_arg >::Defined>::type PromotedT; \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), internal::scalar_constant_op(scalar))); \ } #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ - EIGEN_DEVICE_FUNC inline friend \ - const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME) \ - (METHOD)(const Scalar& scalar, const StorageBaseType& matrix) { \ - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME)( \ - typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ - } \ - \ template EIGEN_DEVICE_FUNC inline friend \ - typename internal::enable_if >::Defined, \ - const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME) >::type \ + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg >::Defined>::type,Derived,OPNAME) \ (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME)( \ - typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ + typedef typename internal::promote_scalar_arg >::Defined>::type PromotedT; \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op(scalar)), matrix.derived()); \ } #define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index c41c408b0..b372ac1ad 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -45,6 +45,34 @@ inline IndexDest convert_index(const IndexSrc& idx) { } +// promote_scalar_arg is an helper used in operation between an expression and a scalar, like: +// expression * scalar +// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression. +// The IsSupported template parameter must be provided by the caller as: ScalarBinaryOpTraits::Defined using the proper order for ExprScalar and T. +// Then the logic is as follows: +// - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned. +// - otherwise, NumTraits::Literal is returned if T is implicitly convertible to NumTraits::Literal AND that this does not imply a float to integer conversion. +// - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE). +template::Literal>::value, + bool IsSafe = NumTraits::IsInteger || !NumTraits::Literal>::IsInteger> +struct promote_scalar_arg +{ +}; + +template +struct promote_scalar_arg +{ + typedef T type; +}; + +template +struct promote_scalar_arg +{ + typedef typename NumTraits::Literal type; +}; + //classes inheriting no_assignment_operator don't generate a default operator=. class no_assignment_operator { diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index fe8c16470..57ef85c32 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -79,6 +79,11 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); + VERIFY_MIX_SCALAR(vcf * 2 , vcf * complex(2)); + VERIFY_MIX_SCALAR(vcf * 2.1 , vcf * complex(2.1)); + VERIFY_MIX_SCALAR(2 * vcf, vcf * complex(2)); + VERIFY_MIX_SCALAR(2.1 * vcf , vcf * complex(2.1)); + // check scalar quotients VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 2f5025305..a419b0e44 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -75,8 +75,8 @@ template void run_nesting_ops_2(const MatrixType& _m) } else { - VERIFY( verify_eval_type<1>(2*m1, 2*m1) ); - VERIFY( verify_eval_type<2>(2*m1, m1) ); + VERIFY( verify_eval_type<2>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<3>(2*m1, m1) ); } VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); VERIFY( verify_eval_type<3>(m1+m1, m1) ); From bf2d5edecc46808b42baeaf3eaced816b34f3364 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 15:35:17 +0200 Subject: [PATCH 77/86] Fix warning. --- Eigen/src/misc/RealSvd2x2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/misc/RealSvd2x2.h b/Eigen/src/misc/RealSvd2x2.h index cdd7777d2..dfaaa0b17 100644 --- a/Eigen/src/misc/RealSvd2x2.h +++ b/Eigen/src/misc/RealSvd2x2.h @@ -51,4 +51,4 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, } // end namespace Eigen -#endif // EIGEN_REALSVD2X2_H \ No newline at end of file +#endif // EIGEN_REALSVD2X2_H From 55fc04e8b5d9f3bf3b82bc4203a1562f0c7637dd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 15:36:42 +0200 Subject: [PATCH 78/86] Fix operator priority --- Eigen/src/Core/CommaInitializer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 38b1112ff..787743b8f 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -93,7 +93,7 @@ struct CommaInitializer eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); } - eigen_assert(m_col Date: Thu, 23 Jun 2016 18:47:31 +0200 Subject: [PATCH 79/86] bug #1241: does not emmit anything for empty tensors --- unsupported/Eigen/CXX11/src/Tensor/TensorIO.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 58ffaefab..f3a3a1b88 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -26,10 +26,7 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - if (total_size == 0) { - os << "Empty tensor of rank " << Rank; - } - else { + if (total_size > 0) { const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); static const int layout = Tensor::Layout; Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); @@ -46,10 +43,7 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - if (total_size == 0) { - os << "Empty tensor of rank 1"; - } - else { + if (total_size > 0) { Map > array(const_cast(tensor.data()), total_size); os << array; } From 361dbd246d0b0f0ceff8d6dea6991807cffde821 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 18:54:30 +0200 Subject: [PATCH 80/86] Add unit test for printing empty tensors --- unsupported/test/cxx11_tensor_io.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp index 8267dcadd..489960529 100644 --- a/unsupported/test/cxx11_tensor_io.cpp +++ b/unsupported/test/cxx11_tensor_io.cpp @@ -40,6 +40,12 @@ static void test_output_1d() std::string expected("0\n1\n2\n3\n4"); VERIFY_IS_EQUAL(std::string(os.str()), expected); + + Eigen::Tensor empty_tensor(0); + std::stringstream empty_os; + empty_os << empty_tensor; + std::string empty_string; + VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string); } From a9c1e4d7b7ce7c9dc5310cee1ed13fdef08e506e Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 23 Jun 2016 16:40:07 -0700 Subject: [PATCH 81/86] Return -1 from CurrentThreadId when called by thread outside the pool. --- .../Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 2 ++ .../Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 10 +++++----- .../Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h | 9 +++++---- .../Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h | 2 +- unsupported/test/cxx11_non_blocking_thread_pool.cpp | 2 +- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 0af91fe64..34270730b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -172,6 +172,8 @@ struct ThreadPoolDevice { pool_->Schedule(func); } + // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if + // called from one of the threads in pool_. Returns -1 otherwise. EIGEN_STRONG_INLINE int currentThreadId() const { return pool_->CurrentThreadId(); } diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 1369ca183..33ae45131 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -99,13 +99,13 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { return static_cast(threads_.size()); } - int CurrentThreadId() const { + int CurrentThreadId() const final { const PerThread* pt = const_cast(this)->GetPerThread(); if (pt->pool == this) { return pt->thread_id; } else { - return NumThreads(); + return -1; } } @@ -113,10 +113,10 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - constexpr PerThread() : pool(NULL), index(-1), rand(0) { } + constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) { } NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. - int thread_id; // Worker thread index in pool. - uint64_t rand; // Random generator state. + uint64_t rand; // Random generator state. + int thread_id; // Worker thread index in pool. }; Environment env_; diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h index 36eb6950f..e75d0f467 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h @@ -78,7 +78,7 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { if (pt->pool == this) { return pt->thread_id; } else { - return NumThreads(); + return -1; } } @@ -128,8 +128,9 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { }; struct PerThread { - ThreadPoolTempl* pool; // Parent pool, or null for normal threads. - int thread_id; // Worker thread index in pool. + constexpr PerThread() : pool(NULL), thread_id(-1) { } + SimpleThreadPoolTempl* pool; // Parent pool, or null for normal threads. + int thread_id; // Worker thread index in pool. }; Environment env_; @@ -141,7 +142,7 @@ class SimpleThreadPoolTempl : public ThreadPoolInterface { bool exiting_ = false; PerThread* GetPerThread() const { - static EIGEN_THREAD_LOCAL PerThread per_thread; + EIGEN_THREAD_LOCAL PerThread per_thread; return &per_thread; } }; diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h index 569cd4bc8..a65ee97c9 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h @@ -22,7 +22,7 @@ class ThreadPoolInterface { virtual int NumThreads() const = 0; // Returns a logical thread index between 0 and NumThreads() - 1 if called - // from one of the threads in the pool. Returns NumThreads() otherwise. + // from one of the threads in the pool. Returns -1 otherwise. virtual int CurrentThreadId() const = 0; virtual ~ThreadPoolInterface() {} diff --git a/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/unsupported/test/cxx11_non_blocking_thread_pool.cpp index 6e4e5cbab..5f9bb938b 100644 --- a/unsupported/test/cxx11_non_blocking_thread_pool.cpp +++ b/unsupported/test/cxx11_non_blocking_thread_pool.cpp @@ -28,7 +28,7 @@ static void test_parallelism() const int kThreads = 16; // code below expects that this is a multiple of 4 NonBlockingThreadPool tp(kThreads); VERIFY_IS_EQUAL(tp.NumThreads(), kThreads); - VERIFY_IS_EQUAL(tp.CurrentThreadId(), kThreads); + VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1); for (int iter = 0; iter < 100; ++iter) { std::atomic running(0); std::atomic done(0); From deb45ad4bc5542b7c66cfb3b465ac106294c3ddd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 24 Jun 2016 09:52:25 +0200 Subject: [PATCH 82/86] bug #1245: fix compilation with msvc --- Eigen/src/Core/util/ForwardDeclarations.h | 1 - Eigen/src/Core/util/Meta.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 830f20f90..1c90c0e2b 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -131,7 +131,6 @@ template class ArrayWrapper; template class MatrixWrapper; template class SolverBase; template class InnerIterator; -template struct ScalarBinaryOpTraits; namespace internal { template struct kernel_retval_base; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index a4a491ff8..02b3d961a 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -439,7 +439,7 @@ T div_ceil(const T &a, const T &b) * * \sa CwiseBinaryOp */ -template +template struct ScalarBinaryOpTraits #ifndef EIGEN_PARSED_BY_DOXYGEN // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class. From cd577a275c3420d743a044e37484ab8fd1e17e37 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 24 Jun 2016 11:28:54 +0200 Subject: [PATCH 83/86] Relax promote_scalar_arg logic to enable promotion to Expr::Scalar if conversion to Expr::Literal fails. This is useful to cancel expression template at the scalar level, e.g. with AutoDiff>. This patch also defers calls to NumTraits in cases for which types are not directly compatible. --- Eigen/src/Core/util/XprHelper.h | 46 ++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index b372ac1ad..3e8048d27 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -51,28 +51,50 @@ inline IndexDest convert_index(const IndexSrc& idx) { // The IsSupported template parameter must be provided by the caller as: ScalarBinaryOpTraits::Defined using the proper order for ExprScalar and T. // Then the logic is as follows: // - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned. -// - otherwise, NumTraits::Literal is returned if T is implicitly convertible to NumTraits::Literal AND that this does not imply a float to integer conversion. +// - otherwise, NumTraits::Literal is returned if T is implicitly convertible to NumTraits::Literal AND that this does not imply a float to integer conversion. +// - otherwise, ExprScalar is returned if T is implicitly convertible to ExprScalar AND that this does not imply a float to integer conversion. // - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE). -template::Literal>::value, - bool IsSafe = NumTraits::IsInteger || !NumTraits::Literal>::IsInteger> -struct promote_scalar_arg -{ -}; +template +struct promote_scalar_arg; -template -struct promote_scalar_arg +template +struct promote_scalar_arg { typedef T type; }; +// Recursively check safe conversion to PromotedType, and then ExprScalar if they are different. +template::value, + bool IsSafe = NumTraits::IsInteger || !NumTraits::IsInteger> +struct promote_scalar_arg_unsupported; + +// Start recursion with NumTraits::Literal template -struct promote_scalar_arg +struct promote_scalar_arg : promote_scalar_arg_unsupported::Literal> {}; + +// We found a match! +template +struct promote_scalar_arg_unsupported { - typedef typename NumTraits::Literal type; + typedef PromotedType type; }; +// No match, but no real-to-integer issues, and ExprScalar and current PromotedType are different, +// so let's try to promote to ExprScalar +template +struct promote_scalar_arg_unsupported + : promote_scalar_arg_unsupported +{}; + +// Unsafe real-to-integer, let's stop. +template +struct promote_scalar_arg_unsupported {}; + +// T is not even convertible to ExprScalar, let's stop. +template +struct promote_scalar_arg_unsupported {}; + //classes inheriting no_assignment_operator don't generate a default operator=. class no_assignment_operator { From fa39f81b48b4fb20410e788e39459ed448dc461d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 24 Jun 2016 11:33:30 +0200 Subject: [PATCH 84/86] Fix instantiation of ScalarBinaryOpTraits for AutoDiff. --- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index feaeeaf5a..0abd12210 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -67,7 +67,7 @@ template class AutoDiffScalar : public internal::auto_diff_special_op <_DerType, !internal::is_same::type>::Scalar, - typename NumTraits::type>::Scalar>::Real>::value> + typename NumTraits::type>::Scalar>::Real>::value> { public: typedef internal::auto_diff_special_op @@ -497,29 +497,29 @@ struct make_coherent_impl -struct ScalarBinaryOpTraits,A_Scalar> +template +struct ScalarBinaryOpTraits,A_Scalar,BinOp> { enum { Defined = 1 }; typedef Matrix ReturnType; }; -template -struct ScalarBinaryOpTraits > +template +struct ScalarBinaryOpTraits, BinOp> { enum { Defined = 1 }; typedef Matrix ReturnType; }; -template -struct ScalarBinaryOpTraits,typename DerType::Scalar> +template +struct ScalarBinaryOpTraits,typename DerType::Scalar,BinOp> { enum { Defined = 1 }; typedef AutoDiffScalar ReturnType; }; -template -struct ScalarBinaryOpTraits > +template +struct ScalarBinaryOpTraits, BinOp> { enum { Defined = 1 }; typedef AutoDiffScalar ReturnType; From ce90647fa58cd81c3561170947e2a507571b2e43 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 24 Jun 2016 11:34:02 +0200 Subject: [PATCH 85/86] Fix NumTraits --- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 0abd12210..8e40569c1 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -525,6 +525,23 @@ struct ScalarBinaryOpTraits, Bi typedef AutoDiffScalar ReturnType; }; + +// The following is an attempt to let Eigen's known about expression template, but that's more tricky! + +// template +// struct ScalarBinaryOpTraits,AutoDiffScalar, BinOp> +// { +// enum { Defined = 1 }; +// typedef AutoDiffScalar ReturnType; +// }; +// +// template +// struct ScalarBinaryOpTraits,AutoDiffScalar, BinOp> +// { +// enum { Defined = 1 };//internal::is_same::value }; +// typedef AutoDiffScalar ReturnType; +// }; + #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ template \ inline const Eigen::AutoDiffScalar< \ @@ -645,9 +662,10 @@ template struct NumTraits > : NumTraits< typename NumTraits::Real > { typedef AutoDiffScalar::Real,DerType::RowsAtCompileTime,DerType::ColsAtCompileTime, - DerType::Options, DerType::MaxRowsAtCompileTime, DerType::MaxColsAtCompileTime> > Real; + 0, DerType::MaxRowsAtCompileTime, DerType::MaxColsAtCompileTime> > Real; typedef AutoDiffScalar NonInteger; typedef AutoDiffScalar Nested; + typedef typename NumTraits::Literal Literal; enum{ RequireInitialization = 1 }; From 6dd9077070d8c9f09101b0b050ab32a7c660c3a0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 24 Jun 2016 11:34:21 +0200 Subject: [PATCH 86/86] Fix some unused typedef warnings. --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 8e40569c1..ee7a92274 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -548,7 +548,7 @@ struct ScalarBinaryOpTraits, Bi EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all::type, typename Eigen::internal::traits::type>::Scalar, product) > \ FUNC(const Eigen::AutoDiffScalar& x) { \ using namespace Eigen; \ - typedef typename Eigen::internal::traits::type>::Scalar Scalar; \ + EIGEN_UNUSED typedef typename Eigen::internal::traits::type>::Scalar Scalar; \ CODE; \ } @@ -617,8 +617,6 @@ EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::t pow(const Eigen::AutoDiffScalar &x, const typename internal::traits::type>::Scalar &y) { using namespace Eigen; - typedef typename internal::remove_all::type DerTypeCleaned; - typedef typename Eigen::internal::traits::Scalar Scalar; return Eigen::MakeAutoDiffScalar(std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); }