From fe630c98739186c1dd3faf7a06b52a64baa5ea23 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 18 Oct 2015 22:15:01 +0200 Subject: [PATCH 001/344] Improve numerical accuracy in LLT and triangular solve by using true scalar divisions (instead of x * (1/y)) --- Eigen/src/Cholesky/LLT.h | 2 +- Eigen/src/Core/products/TriangularSolverMatrix.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index dc73304e8..1f0091f3c 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -285,7 +285,7 @@ template struct llt_inplace return k; mat.coeffRef(k,k) = x = sqrt(x); if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); - if (rs>0) A21 *= RealScalar(1)/x; + if (rs>0) A21 /= x; } return -1; } diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index a9a198d64..208593718 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -304,9 +304,12 @@ EIGEN_DONT_INLINE void triangular_solve_matrix Date: Tue, 20 Oct 2015 11:37:44 -0700 Subject: [PATCH 002/344] Fixed a bug in the tensor conversion op --- unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 4ca978ab4..79d802ea9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -124,8 +124,8 @@ class TensorConversionOp : public TensorBase::StorageKind StorageKind; typedef typename internal::traits::Index Index; typedef typename internal::nested::type Nested; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef Scalar CoeffReturnType; + typedef Packet PacketReturnType; typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) @@ -164,6 +164,8 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { + + EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } From eaf4b98180d7606abba69133e39e23537ced79e5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 20 Oct 2015 11:41:22 -0700 Subject: [PATCH 003/344] Added support for boolean reductions (ie 'and' & 'or' reductions) --- unsupported/Eigen/CXX11/src/Tensor/README.md | 13 +++++++++ .../Eigen/CXX11/src/Tensor/TensorBase.h | 26 ++++++++++++++++++ .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 27 +++++++++++++++++++ unsupported/test/cxx11_tensor_reduction.cpp | 17 ++++++++++++ 4 files changed, 83 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index 87e57cebb..407485090 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -1149,6 +1149,19 @@ are the smallest of the reduced values. Reduce a tensor using the prod() operator. The resulting values are the product of the reduced values. +### <Operation> all(const Dimensions& new_dims) +### <Operation> all() +Reduce a tensor using the all() operator. Casts tensor to bool and then checks +whether all elements are true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + +### <Operation> any(const Dimensions& new_dims) +### <Operation> any() +Reduce a tensor using the any() operator. Casts tensor to bool and then checks +whether any element is true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + + ### <Operation> reduce(const Dimensions& new_dims, const Reducer& reducer) Reduce a tensor using a user-defined reduction operator. See ```SumReducer``` diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 477e4a174..c00f67950 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -363,6 +363,32 @@ class TensorBase return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + all(const Dims& dims) const { + return cast().reduce(dims, internal::AndReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + all() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::AndReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + any(const Dims& dims) const { + return cast().reduce(dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + any() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::OrReducer()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorTupleReducerOp< internal::ArgMaxTupleReducer >, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index ed259399b..a98c6a2e3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -219,6 +219,33 @@ template struct ProdReducer }; +struct AndReducer +{ + static const bool PacketAccess = false; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum && t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +struct OrReducer { + static const bool PacketAccess = false; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum || t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return false; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + // Argmin/Argmax reducers template struct ArgMaxTupleReducer { diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp index b2c85a879..e8180c061 100644 --- a/unsupported/test/cxx11_tensor_reduction.cpp +++ b/unsupported/test/cxx11_tensor_reduction.cpp @@ -180,6 +180,23 @@ static void test_simple_reductions() { VERIFY_IS_APPROX(mean1(0), mean2(0)); } + + { + Tensor ints(10); + std::iota(ints.data(), ints.data() + ints.dimension(0), 0); + + TensorFixedSize > all; + all = ints.all(); + VERIFY(!all(0)); + all = (ints >= ints.constant(0)).all(); + VERIFY(all(0)); + + TensorFixedSize > any; + any = (ints > ints.constant(10)).any(); + VERIFY(!any(0)); + any = (ints < ints.constant(1)).any(); + VERIFY(any(0)); + } } template From 73b8e719ae28fe0a4b22cc9720fa46ce601541f9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 20 Oct 2015 11:42:34 -0700 Subject: [PATCH 004/344] Removed bogus assertion --- unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 79d802ea9..e9d3437b7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -164,8 +164,6 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { - - EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } From 0af63493fdb12cea2fdfbabfbddbd4db44587047 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 20 Oct 2015 11:53:30 -0700 Subject: [PATCH 005/344] Disable SFINAE for versions of gcc older than 4.8 --- unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h index 6d9cc4f38..939de5f11 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -26,8 +26,17 @@ * void foo(){} */ +// SFINAE requires variadic templates #ifdef EIGEN_HAS_VARIADIC_TEMPLATES -#define EIGEN_HAS_SFINAE + // SFINAE doesn't work for gcc <= 4.7 + #ifdef EIGEN_COMP_GNUC + #if EIGEN_GNUC_AT_LEAST(4,8) + #define EIGEN_HAS_SFINAE + #endif + #else + #define EIGEN_HAS_SFINAE + #endif + #endif #define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ From 896126588997f89d647ec857a4dd832e462a013b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Oct 2015 09:47:43 +0200 Subject: [PATCH 006/344] bug #1064: add support for Ref --- Eigen/src/SparseCore/SparseRef.h | 147 +++++++++++++++++++++++++++- Eigen/src/SparseCore/SparseVector.h | 3 + test/sparse_ref.cpp | 20 +++- 3 files changed, 165 insertions(+), 5 deletions(-) diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index e10bf6878..f9735fd1c 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -19,7 +19,7 @@ enum { namespace internal { template class SparseRefBase; - + template struct traits, _Options, _StrideType> > : public traits > @@ -27,7 +27,7 @@ struct traits, _Options, _Stride typedef SparseMatrix PlainObjectType; enum { Options = _Options, - Flags = traits >::Flags | CompressedAccessBit | NestByRefBit + Flags = traits::Flags | CompressedAccessBit | NestByRefBit }; template struct match { @@ -48,7 +48,35 @@ struct traits, _Options, _ Flags = (traits >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit }; }; - + +template +struct traits, _Options, _StrideType> > + : public traits > +{ + typedef SparseVector PlainObjectType; + enum { + Options = _Options, + Flags = traits::Flags | CompressedAccessBit | NestByRefBit + }; + + template struct match { + enum { + MatchAtCompileTime = (Derived::Flags&CompressedAccessBit) && Derived::IsVectorAtCompileTime + }; + typedef typename internal::conditional::type type; + }; + +}; + +template +struct traits, _Options, _StrideType> > + : public traits, _Options, _StrideType> > +{ + enum { + Flags = (traits >::Flags | CompressedAccessBit | NestByRefBit) & ~LvalueBit + }; +}; + template struct traits > : public traits {}; @@ -195,6 +223,99 @@ class Ref, Options, StrideType }; + +/** + * \ingroup Sparse_Module + * + * \brief A sparse vector expression referencing an existing sparse vector expression + * + * \tparam PlainObjectType the equivalent sparse matrix type of the referenced data + * \tparam Options Not used for SparseVector. + * \tparam StrideType Only used for dense Ref + * + * \sa class Ref + */ +template +class Ref, Options, StrideType > + : public internal::SparseRefBase, Options, StrideType > > +{ + typedef SparseVector PlainObjectType; + typedef internal::traits Traits; + template + inline Ref(const SparseVector& expr); + public: + + typedef internal::SparseRefBase Base; + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + inline Ref(SparseVector& expr) + { + EIGEN_STATIC_ASSERT(bool(Traits::template match >::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); + } + + template + inline Ref(const SparseCompressedBase& expr) + #else + template + inline Ref(SparseCompressedBase& expr) + #endif + { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.const_cast_derived()); + } +}; + +// this is the const ref version +template +class Ref, Options, StrideType> + : public internal::SparseRefBase, Options, StrideType> > +{ + typedef SparseVector TPlainObjectType; + typedef internal::traits Traits; + public: + + typedef internal::SparseRefBase Base; + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + + template + inline Ref(const SparseMatrixBase& expr) + { + construct(expr.derived(), typename Traits::template match::type()); + } + + inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + + protected: + + template + void construct(const Expression& expr,internal::true_type) + { + Base::construct(expr); + } + + template + void construct(const Expression& expr, internal::false_type) + { + TPlainObjectType* obj = reinterpret_cast(m_object_bytes); + ::new (obj) TPlainObjectType(expr); + Base::construct(*obj); + } + + protected: + char m_object_bytes[sizeof(TPlainObjectType)]; +}; + namespace internal { template @@ -217,6 +338,26 @@ struct evaluator, Options, explicit evaluator(const XprType &mat) : Base(mat) {} }; +template +struct evaluator, Options, StrideType> > + : evaluator, Options, StrideType> > > +{ + typedef evaluator, Options, StrideType> > > Base; + typedef Ref, Options, StrideType> XprType; + evaluator() : Base() {} + explicit evaluator(const XprType &mat) : Base(mat) {} +}; + +template +struct evaluator, Options, StrideType> > + : evaluator, Options, StrideType> > > +{ + typedef evaluator, Options, StrideType> > > Base; + typedef Ref, Options, StrideType> XprType; + evaluator() : Base() {} + explicit evaluator(const XprType &mat) : Base(mat) {} +}; + } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index f941fa5e1..94f8d0341 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -235,6 +235,9 @@ class SparseVector inline SparseVector(const SparseMatrixBase& other) : m_size(0) { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif check_template_parameters(); *this = other.derived(); } diff --git a/test/sparse_ref.cpp b/test/sparse_ref.cpp index d2d475616..f4aefbb48 100644 --- a/test/sparse_ref.cpp +++ b/test/sparse_ref.cpp @@ -53,10 +53,14 @@ EIGEN_DONT_INLINE void call_ref_3(const Ref, StandardC VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } +template +EIGEN_DONT_INLINE void call_ref_4(Ref > a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_5(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + void call_ref() { -// SparseVector > ca = VectorXcf::Random(10).sparseView(); -// SparseVector a = VectorXf::Random(10).sparseView(); SparseMatrix A = MatrixXf::Random(10,10).sparseView(0.5,1); SparseMatrix B = MatrixXf::Random(10,10).sparseView(0.5,1); SparseMatrix C = MatrixXf::Random(10,10).sparseView(0.5,1); @@ -111,6 +115,15 @@ void call_ref() VERIFY_EVALUATION_COUNT( call_ref_2(vr, vr.transpose()), 0); VERIFY_EVALUATION_COUNT( call_ref_2(A.block(1,1,3,3), A.block(1,1,3,3)), 1); // should be 0 (allocate starts/nnz only) + + VERIFY_EVALUATION_COUNT( call_ref_4(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(A.col(2), A.col(2)), 0); + // VERIFY_EVALUATION_COUNT( call_ref_4(A.row(2), A.row(2).transpose()), 1); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_5(A.row(2), A.row(2).transpose()), 1); } void test_sparse_ref() @@ -119,5 +132,8 @@ void test_sparse_ref() CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); CALL_SUBTEST_2( call_ref() ); + + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); } } From 8afd0ce9552e64001012f20c89b6f56daf120896 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Oct 2015 13:48:15 +0200 Subject: [PATCH 007/344] add FIXME --- Eigen/src/SparseCore/SparseRef.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index f9735fd1c..19e06fc80 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -318,6 +318,8 @@ class Ref, Options, StrideType namespace internal { +// FIXME shall we introduce a general evaluatior_ref that we can specialize for any sparse object once, and thus remove this copy-pasta thing... + template struct evaluator, Options, StrideType> > : evaluator, Options, StrideType> > > From b178cc347968675bdae942dbdcb7de9ed9daa564 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 21 Oct 2015 11:28:28 -0700 Subject: [PATCH 008/344] Added some syntactic sugar to make it simpler to compare a tensor to a scalar. --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 32 ++++++++++++++++ unsupported/test/CMakeLists.txt | 1 + unsupported/test/cxx11_tensor_sugar.cpp | 38 +++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 unsupported/test/cxx11_tensor_sugar.cpp diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index c00f67950..1b85f5ef5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -279,6 +279,38 @@ class TensorBase return binaryExpr(other.derived(), std::not_equal_to()); } + // comparisons and tests for Scalars + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<(Scalar threshold) const { + return operator<(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<=(Scalar threshold) const { + return operator<=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>(Scalar threshold) const { + return operator>(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>=(Scalar threshold) const { + return operator>=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator==(Scalar threshold) const { + return operator==(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator!=(Scalar threshold) const { + return operator!=(constant(threshold)); + } + // Coefficient-wise ternary operators. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorSelectOp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 8865892e6..5a9ed5730 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -143,6 +143,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_generator "-std=c++0x") ei_add_test(cxx11_tensor_custom_op "-std=c++0x") ei_add_test(cxx11_tensor_custom_index "-std=c++0x") + ei_add_test(cxx11_tensor_sugar "-std=c++0x") # These tests needs nvcc # ei_add_test(cxx11_tensor_device "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp new file mode 100644 index 000000000..7848acc8b --- /dev/null +++ b/unsupported/test/cxx11_tensor_sugar.cpp @@ -0,0 +1,38 @@ +#include "main.h" + +#include + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_comparison_sugar() { + // we already trust comparisons between tensors, we're simply checking that + // the sugared versions are doing the same thing + Tensor t(6, 7, 5); + + t.setRandom(); + // make sure we have at least one value == 0 + t(0,0,0) = 0; + + Tensor b; + +#define TEST_TENSOR_EQUAL(e1, e2) \ + b = ((e1) == (e2)).all(); \ + VERIFY(b(0)) + +#define TEST_OP(op) TEST_TENSOR_EQUAL(t op 0, t op t.constant(0)) + + TEST_OP(==); + TEST_OP(!=); + TEST_OP(<=); + TEST_OP(>=); + TEST_OP(<); + TEST_OP(>); +#undef TEST_OP +#undef TEST_TENSOR_EQUAL +} + +void test_cxx11_tensor_sugar() +{ + CALL_SUBTEST(test_comparison_sugar()); +} From e78bc111f1dc0a7af2360b836c94c33d67e55fc5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 21 Oct 2015 20:58:33 +0200 Subject: [PATCH 009/344] bug #1090: fix a shortcoming in redux logic for which slice-vectorization plus unrolling might happen. --- Eigen/src/Core/Redux.h | 5 +++-- test/redux.cpp | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 87b4a9c46..309898b36 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -269,8 +269,9 @@ struct redux_impl } }; -template -struct redux_impl +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl { typedef typename Derived::Scalar Scalar; typedef typename packet_traits::type PacketType; diff --git a/test/redux.cpp b/test/redux.cpp index 9b0767c73..849faf55e 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -56,6 +56,14 @@ template void matrixRedux(const MatrixType& m) VERIFY_IS_APPROX(m1_for_prod.block(r0,c0,r1,c1).prod(), m1_for_prod.block(r0,c0,r1,c1).eval().prod()); VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().minCoeff(), m1.block(r0,c0,r1,c1).real().eval().minCoeff()); VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().maxCoeff(), m1.block(r0,c0,r1,c1).real().eval().maxCoeff()); + + // regression for bug 1090 + const int R1 = MatrixType::RowsAtCompileTime>=2 ? MatrixType::RowsAtCompileTime/2 : 6; + const int C1 = MatrixType::ColsAtCompileTime>=2 ? MatrixType::ColsAtCompileTime/2 : 6; + if(R1<=rows-r0 && C1<=cols-c0) + { + VERIFY_IS_APPROX( (m1.template block(r0,c0).sum()), m1.block(r0,c0,R1,C1).sum() ); + } // test empty objects VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0)); From 6df8e99470d1ecdd89f451c1bd366672d5a27b6b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 22 Oct 2015 16:10:28 +0200 Subject: [PATCH 010/344] bug #1089: add a warning when using a MatrixBase method which is implemented within another module by declaring them inline. --- Eigen/src/Core/MatrixBase.h | 51 ++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 7c66572d1..b5afff005 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -328,23 +328,26 @@ template class MatrixBase /////////// LU module /////////// - EIGEN_DEVICE_FUNC const FullPivLU fullPivLu() const; - EIGEN_DEVICE_FUNC const PartialPivLU partialPivLu() const; - - const PartialPivLU lu() const; + EIGEN_DEVICE_FUNC + inline const FullPivLU fullPivLu() const; + EIGEN_DEVICE_FUNC + inline const PartialPivLU partialPivLu() const; EIGEN_DEVICE_FUNC - const Inverse inverse() const; + inline const PartialPivLU lu() const; + + EIGEN_DEVICE_FUNC + inline const Inverse inverse() const; template - void computeInverseAndDetWithCheck( + inline void computeInverseAndDetWithCheck( ResultType& inverse, typename ResultType::Scalar& determinant, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() ) const; template - void computeInverseWithCheck( + inline void computeInverseWithCheck( ResultType& inverse, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() @@ -353,22 +356,24 @@ template class MatrixBase /////////// Cholesky module /////////// - const LLT llt() const; - const LDLT ldlt() const; + inline const LLT llt() const; + inline const LDLT ldlt() const; /////////// QR module /////////// - const HouseholderQR householderQr() const; - const ColPivHouseholderQR colPivHouseholderQr() const; - const FullPivHouseholderQR fullPivHouseholderQr() const; + inline const HouseholderQR householderQr() const; + inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline const FullPivHouseholderQR fullPivHouseholderQr() const; - EigenvaluesReturnType eigenvalues() const; - RealScalar operatorNorm() const; +/////////// Eigenvalues module /////////// + + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; /////////// SVD module /////////// - JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; - BDCSVD bdcSvd(unsigned int computationOptions = 0) const; + inline JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + inline BDCSVD bdcSvd(unsigned int computationOptions = 0) const; /////////// Geometry module /////////// @@ -381,24 +386,24 @@ template class MatrixBase #endif // EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC - typename cross_product_return_type::type + inline typename cross_product_return_type::type cross(const MatrixBase& other) const; template EIGEN_DEVICE_FUNC - PlainObject cross3(const MatrixBase& other) const; + inline PlainObject cross3(const MatrixBase& other) const; EIGEN_DEVICE_FUNC - PlainObject unitOrthogonal(void) const; + inline PlainObject unitOrthogonal(void) const; - Matrix eulerAngles(Index a0, Index a1, Index a2) const; + inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; - ScalarMultipleReturnType operator*(const UniformScaling& s) const; + inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; typedef Homogeneous HomogeneousReturnType; - HomogeneousReturnType homogeneous() const; + inline HomogeneousReturnType homogeneous() const; enum { SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 @@ -409,7 +414,7 @@ template class MatrixBase typedef CwiseUnaryOp::Scalar>, const ConstStartMinusOne > HNormalizedReturnType; - const HNormalizedReturnType hnormalized() const; + inline const HNormalizedReturnType hnormalized() const; ////////// Householder module /////////// From 0eb46508e2e653218557e0ba9858926be9da04e9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 22 Oct 2015 16:30:28 +0200 Subject: [PATCH 011/344] Avoid any openmp calls if multi-threading is explicitely disabled at runtime. --- Eigen/src/Core/products/Parallelizer.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 91d37a123..e0bfcc356 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -102,21 +102,17 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos // - we are not already in a parallel code // - the sizes are large enough - // 1- are we already in a parallel session? - // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp? - if((!Condition) || (omp_get_num_threads()>1)) - return func(0,rows, 0,cols); - - Index size = transpose ? rows : cols; - - // 2- compute the maximal number of threads from the size of the product: + // compute the maximal number of threads from the size of the product: // FIXME this has to be fine tuned - Index max_threads = std::max(1,size / 32); + Index size = transpose ? rows : cols; + Index pb_max_threads = std::max(1,size / 32); + // compute the number of threads we are going to use + Index threads = std::min(nbThreads(), pb_max_threads); - // 3 - compute the number of threads we are going to use - Index threads = std::min(nbThreads(), max_threads); - - if(threads==1) + // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session, + // then abort multi-threading + // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp? + if((!Condition) || (threads==1) || (omp_get_num_threads()>1)) return func(0,rows, 0,cols); Eigen::initParallel(); From 4cf7da63de0987dc8b49e5801f0cb79eb7fa6dbb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Oct 2015 11:48:02 -0700 Subject: [PATCH 012/344] Added a constructor to simplify the construction of tensormap from tensor --- .../Eigen/CXX11/src/Tensor/TensorMap.h | 8 +- unsupported/test/cxx11_tensor_map.cpp | 104 ++++++++++++++++++ 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 2cb2bc7a6..55c289810 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -82,15 +82,19 @@ template class TensorMap : public Tensor } #endif - inline TensorMap(PointerArgType dataPtr, const array& dimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array& dimensions) : m_data(dataPtr), m_dimensions(dimensions) { } template - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) : m_data(dataPtr), m_dimensions(dimensions) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) + : m_data(tensor.data()), m_dimensions(tensor.dimensions()) + { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } EIGEN_DEVICE_FUNC diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index 9cf2eb150..9ef935853 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -139,9 +139,113 @@ static void test_3d() } +static void test_from_tensor() +{ + Tensor mat1(2,3,7); + Tensor mat2(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + mat2(i,j,k) = val; + val++; + } + } + } + + TensorMap> mat3(mat1); + TensorMap> mat4(mat2); + + VERIFY_IS_EQUAL(mat3.rank(), 3); + VERIFY_IS_EQUAL(mat3.size(), 2*3*7); + VERIFY_IS_EQUAL(mat3.dimension(0), 2); + VERIFY_IS_EQUAL(mat3.dimension(1), 3); + VERIFY_IS_EQUAL(mat3.dimension(2), 7); + + VERIFY_IS_EQUAL(mat4.rank(), 3); + VERIFY_IS_EQUAL(mat4.size(), 2*3*7); + VERIFY_IS_EQUAL(mat4.dimension(0), 2); + VERIFY_IS_EQUAL(mat4.dimension(1), 3); + VERIFY_IS_EQUAL(mat4.dimension(2), 7); + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat3(i,j,k), val); + VERIFY_IS_EQUAL(mat4(i,j,k), val); + val++; + } + } + } + + TensorFixedSize> mat5; + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat5(i,j,k) = val; + val++; + } + } + } + + TensorMap>> mat6(mat5); + + VERIFY_IS_EQUAL(mat6.rank(), 3); + VERIFY_IS_EQUAL(mat6.size(), 2*3*7); + VERIFY_IS_EQUAL(mat6.dimension(0), 2); + VERIFY_IS_EQUAL(mat6.dimension(1), 3); + VERIFY_IS_EQUAL(mat6.dimension(2), 7); + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat6(i,j,k), val); + val++; + } + } + } +} + + +static int f(const TensorMap >& tensor) { + Tensor result = tensor.sum(); + return result(0); +} + +static void test_casting() +{ + Tensor tensor(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + tensor(i,j,k) = val; + val++; + } + } + } + + TensorMap> map(tensor); + int sum1 = f(map); + int sum2 = f(tensor); + + VERIFY_IS_EQUAL(sum1, sum2); + VERIFY_IS_EQUAL(sum1, 41); +} + void test_cxx11_tensor_map() { CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); + + CALL_SUBTEST(test_from_tensor()); + CALL_SUBTEST(test_casting()); } From 825146c8fd21cc31e96cef42a57b0bbf25f266de Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Oct 2015 11:56:00 -0700 Subject: [PATCH 013/344] Fixed incorrect expected value --- unsupported/test/cxx11_tensor_map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index 9ef935853..4c4e10df2 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -237,7 +237,7 @@ static void test_casting() int sum2 = f(tensor); VERIFY_IS_EQUAL(sum1, sum2); - VERIFY_IS_EQUAL(sum1, 41); + VERIFY_IS_EQUAL(sum1, 861); } void test_cxx11_tensor_map() From 71b473aab1ee71185bd91e51ee0eb93fd047a847 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 22 Oct 2015 21:58:18 +0200 Subject: [PATCH 014/344] Remove invalid typename keyword --- test/sparse_permutations.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/sparse_permutations.cpp b/test/sparse_permutations.cpp index c2e1d84a3..8c257a3fd 100644 --- a/test/sparse_permutations.cpp +++ b/test/sparse_permutations.cpp @@ -228,9 +228,9 @@ void test_sparse_permutations() CALL_SUBTEST_2(( sparse_permutations_all >(s) )); } - VERIFY((internal::is_same,OnTheRight,false,SparseShape>::ReturnType, - typename internal::nested_eval,PermutationMatrix,AliasFreeProduct>,1>::type>::value)); + VERIFY((internal::is_same,OnTheRight,false,SparseShape>::ReturnType, + internal::nested_eval,PermutationMatrix,AliasFreeProduct>,1>::type>::value)); - VERIFY((internal::is_same,OnTheLeft,false,SparseShape>::ReturnType, - typename internal::nested_eval,SparseMatrix,AliasFreeProduct>,1>::type>::value)); + VERIFY((internal::is_same,OnTheLeft,false,SparseShape>::ReturnType, + internal::nested_eval,SparseMatrix,AliasFreeProduct>,1>::type>::value)); } From a147c62998dd38d9adf180291783845c43f8a0fa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Oct 2015 16:51:30 -0700 Subject: [PATCH 015/344] Added support for fourier transforms (code courtesy of thucjw@gmail.com) --- .../Eigen/CXX11/src/Tensor/TensorFFT.h | 598 ++++++++++++++++++ 1 file changed, 598 insertions(+) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h new file mode 100644 index 000000000..d9b316de1 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -0,0 +1,598 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H +#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H + +// NVCC fails to compile this code +#if !defined(__CUDACC__) + +namespace Eigen { + +/** \class TensorFFT + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor FFT class. + * + * TODO: + * Vectorize the Cooley Tukey and the Bluestein algorithm + * Add support for multithreaded evaluation + * Improve the performance on GPU + */ + +template struct MakeComplex { + template + EIGEN_DEVICE_FUNC + T operator() (const T& val) const { return val; } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const T& val) const { return std::complex(val, 0); } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const std::complex& val) const { return val; } +}; + +template struct PartOf { + template T operator() (const T& val) const { return val; } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.real(); } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.imag(); } +}; + +namespace internal { +template +struct traits > : public traits { + typedef traits XprTraits; + typedef typename NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename XprTraits::Scalar InputScalar; + typedef typename conditional::type OutputScalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> { + typedef const TensorFFTOp& type; +}; + +template +struct nested, 1, typename eval >::type> { + typedef TensorFFTOp type; +}; + +} // end namespace internal + +template +class TensorFFTOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) + : m_xpr(expr), m_fft(fft) {} + + EIGEN_DEVICE_FUNC + const FFT& fft() const { return m_fft; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& expression() const { + return m_xpr; + } + + protected: + typename XprType::Nested m_xpr; + const FFT m_fft; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> { + typedef TensorFFTOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef internal::traits XprTraits; + typedef typename XprTraits::Scalar InputScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + IsAligned = false, + PacketAccess = true, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_data(NULL), m_impl(op.expression(), device), m_fft(op.fft()), m_device(device) { + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + } + } + m_size = m_dimensions.TotalSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_dimensions; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (data) { + evalToBuf(data); + return false; + } else { + m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); + evalToBuf(m_data); + return true; + } + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_data) { + m_device.deallocate(m_data); + m_data = NULL; + } + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { + return m_data[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const { + return internal::ploadt(m_data + index); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { + const bool write_to_out = internal::is_same::value; + ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); + + for (int i = 0; i < m_size; ++i) { + buf[i] = MakeComplex::value>()(m_impl.coeff(i)); + } + + for (int i = 0; i < m_fft.size(); ++i) { + int dim = m_fft[i]; + eigen_assert(dim >= 0 && dim < NumDims); + Index line_len = m_dimensions[dim]; + eigen_assert(line_len >= 1); + ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); + const bool is_power_of_two = isPowerOfTwo(line_len); + const int good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); + const int log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); + + ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); + if (!is_power_of_two) { + ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len)); + for (int i = 0; i < line_len + 1; ++i) { + pos_j_base_powered[i] = std::pow(pos_j_base, i * i); + } + } + + for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { + Index base_offset = getBaseOffsetFromIndex(partial_index, dim); + + // get data into line_buf + for (int j = 0; j < line_len; ++j) { + Index offset = getIndexFromOffset(base_offset, dim, j); + line_buf[j] = buf[offset]; + } + + // processs the line + if (is_power_of_two) { + processDataLineCooleyTukey(line_buf, line_len, log_len); + } + else { + processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); + } + + // write back + for (int j = 0; j < line_len; ++j) { + const ComplexScalar div_factor = (FFTDir == FFT_FORWARD) ? ComplexScalar(1, 0) : ComplexScalar(line_len, 0); + Index offset = getIndexFromOffset(base_offset, dim, j); + buf[offset] = line_buf[j] / div_factor; + } + } + m_device.deallocate(line_buf); + if (!pos_j_base_powered) { + m_device.deallocate(a); + m_device.deallocate(b); + m_device.deallocate(pos_j_base_powered); + } + } + + if(!write_to_out) { + for (int i = 0; i < m_size; ++i) { + data[i] = PartOf()(buf[i]); + } + m_device.deallocate(buf); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(int x) { + eigen_assert(x > 0); + return !(x & (x - 1)); + } + + // The composite number for padding, used in Bluestein's FFT algorithm + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int findGoodComposite(int n) { + int i = 2; + while (i < 2 * n - 1) i *= 2; + return i; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int getLog2(int m) { + int log2m = 0; + while (m >>= 1) log2m++; + return log2m; + } + + // Call Cooley Tukey algorithm directly, data length must be power of 2 + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, int line_len, int log_len) { + eigen_assert(isPowerOfTwo(line_len)); + scramble_FFT(line_buf, line_len); + compute_1D_Butterfly(line_buf, line_len, log_len); + } + + // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, int line_len, int good_composite, int log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { + int n = line_len; + int m = good_composite; + ComplexScalar* data = line_buf; + + for (int i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + a[i] = data[i] * std::conj(pos_j_base_powered[i]); + } + else { + a[i] = data[i] * pos_j_base_powered[i]; + } + } + for (int i = n; i < m; ++i) { + a[i] = ComplexScalar(0, 0); + } + + for (int i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[i]; + } + else { + b[i] = std::conj(pos_j_base_powered[i]); + } + } + for (int i = n; i < m - n; ++i) { + b[i] = ComplexScalar(0, 0); + } + for (int i = m - n; i < m; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[m-i]; + } + else { + b[i] = std::conj(pos_j_base_powered[m-i]); + } + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + scramble_FFT(b, m); + compute_1D_Butterfly(b, m, log_len); + + for (int i = 0; i < m; ++i) { + a[i] *= b[i]; + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + //Do the scaling after ifft + for (int i = 0; i < m; ++i) { + a[i] /= m; + } + + for (int i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + data[i] = a[i] * std::conj(pos_j_base_powered[i]); + } + else { + data[i] = a[i] * pos_j_base_powered[i]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, int n) { + eigen_assert(isPowerOfTwo(n)); + int j = 1; + for (int i = 1; i < n; ++i){ + if (j > i) { + std::swap(data[j-1], data[i-1]); + } + int m = n >> 1; + while (m >= 2 && j > m) { + j -= m; + m >>= 1; + } + j += m; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, int n, int n_power_of_2) { + eigen_assert(isPowerOfTwo(n)); + if (n == 1) { + return; + } + else if (n == 2) { + ComplexScalar tmp = data[1]; + data[1] = data[0] - data[1]; + data[0] += tmp; + return; + } + else if (n == 4) { + ComplexScalar tmp[4]; + tmp[0] = data[0] + data[1]; + tmp[1] = data[0] - data[1]; + tmp[2] = data[2] + data[3]; + if(Dir == FFT_FORWARD) { + tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); + } + else { + tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); + } + data[0] = tmp[0] + tmp[2]; + data[1] = tmp[1] + tmp[3]; + data[2] = tmp[0] - tmp[2]; + data[3] = tmp[1] - tmp[3]; + return; + } + else if (n == 8) { + ComplexScalar tmp_1[8]; + ComplexScalar tmp_2[8]; + + tmp_1[0] = data[0] + data[1]; + tmp_1[1] = data[0] - data[1]; + tmp_1[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); + } + else { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); + } + tmp_1[4] = data[4] + data[5]; + tmp_1[5] = data[4] - data[5]; + tmp_1[6] = data[6] + data[7]; + if (Dir == FFT_FORWARD) { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); + } + else { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); + } + tmp_2[0] = tmp_1[0] + tmp_1[2]; + tmp_2[1] = tmp_1[1] + tmp_1[3]; + tmp_2[2] = tmp_1[0] - tmp_1[2]; + tmp_2[3] = tmp_1[1] - tmp_1[3]; + tmp_2[4] = tmp_1[4] + tmp_1[6]; + // SQRT2DIV2 = sqrt(2)/2 + #define SQRT2DIV2 0.7071067811865476 + if (Dir == FFT_FORWARD) { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); + } + else { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); + } + data[0] = tmp_2[0] + tmp_2[4]; + data[1] = tmp_2[1] + tmp_2[5]; + data[2] = tmp_2[2] + tmp_2[6]; + data[3] = tmp_2[3] + tmp_2[7]; + data[4] = tmp_2[0] - tmp_2[4]; + data[5] = tmp_2[1] - tmp_2[5]; + data[6] = tmp_2[2] - tmp_2[6]; + data[7] = tmp_2[3] - tmp_2[7]; + + return; + } + else { + compute_1D_Butterfly(data, n/2, n_power_of_2 - 1); + compute_1D_Butterfly(data + n/2, n/2, n_power_of_2 - 1); + //Original code: + //RealScalar wtemp = std::sin(M_PI/n); + //RealScalar wpi = -std::sin(2 * M_PI/n); + RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; + RealScalar wpi; + if (Dir == FFT_FORWARD) { + wpi = m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; + } + else { + wpi = 0 - m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; + } + + const ComplexScalar wp(wtemp, wpi); + ComplexScalar w(1.0, 0.0); + for(int i = 0; i < n/2; i++) { + ComplexScalar temp(data[i + n/2] * w); + data[i + n/2] = data[i] - temp; + data[i] += temp; + w += w * wp; + } + return; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { + Index result = 0; + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > omitted_dim; --i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + else { + for (int i = 0; i < omitted_dim; ++i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + // Value of index_coords[omitted_dim] is not determined to this step + return result; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { + Index result = base + offset * m_strides[omitted_dim] ; + return result; + } + + protected: + int m_size; + const FFT& m_fft; + Dimensions m_dimensions; + array m_strides; + TensorEvaluator m_impl; + CoeffReturnType* m_data; + const Device& m_device; + + // This will support a maximum FFT size of 2^32 for each dimension + // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; + RealScalar m_sin_PI_div_n_LUT[32] = { + 0.0, + -2, + -0.999999999999999, + -0.292893218813453, + -0.0761204674887130, + -0.0192147195967696, + -0.00481527332780311, + -0.00120454379482761, + -3.01181303795779e-04, + -7.52981608554592e-05, + -1.88247173988574e-05, + -4.70619042382852e-06, + -1.17654829809007e-06, + -2.94137117780840e-07, + -7.35342821488550e-08, + -1.83835707061916e-08, + -4.59589268710903e-09, + -1.14897317243732e-09, + -2.87243293150586e-10, + -7.18108232902250e-11, + -1.79527058227174e-11, + -4.48817645568941e-12, + -1.12204411392298e-12, + -2.80511028480785e-13, + -7.01277571201985e-14, + -1.75319392800498e-14, + -4.38298482001247e-15, + -1.09574620500312e-15, + -2.73936551250781e-16, + -6.84841378126949e-17, + -1.71210344531737e-17, + -4.28025861329343e-18 + }; + + // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); + RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { + 0.0, + 0.0, + -1.00000000000000e+00, + -7.07106781186547e-01, + -3.82683432365090e-01, + -1.95090322016128e-01, + -9.80171403295606e-02, + -4.90676743274180e-02, + -2.45412285229123e-02, + -1.22715382857199e-02, + -6.13588464915448e-03, + -3.06795676296598e-03, + -1.53398018628477e-03, + -7.66990318742704e-04, + -3.83495187571396e-04, + -1.91747597310703e-04, + -9.58737990959773e-05, + -4.79368996030669e-05, + -2.39684498084182e-05, + -1.19842249050697e-05, + -5.99211245264243e-06, + -2.99605622633466e-06, + -1.49802811316901e-06, + -7.49014056584716e-07, + -3.74507028292384e-07, + -1.87253514146195e-07, + -9.36267570730981e-08, + -4.68133785365491e-08, + -2.34066892682746e-08, + -1.17033446341373e-08, + -5.85167231706864e-09, + -2.92583615853432e-09 + }; +}; + +} // end namespace Eigen + +#endif // __CUDACC__ + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H From 2495e2479fb00674a8ad78ea79e10ac2c952f2a7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Oct 2015 16:52:55 -0700 Subject: [PATCH 016/344] Added tests for the fft code --- unsupported/test/CMakeLists.txt | 2 + unsupported/test/cxx11_tensor_fft.cpp | 273 +++++++++++++++++++++++++ unsupported/test/cxx11_tensor_ifft.cpp | 154 ++++++++++++++ 3 files changed, 429 insertions(+) create mode 100644 unsupported/test/cxx11_tensor_fft.cpp create mode 100644 unsupported/test/cxx11_tensor_ifft.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 5a9ed5730..3d82508f7 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -144,6 +144,8 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_custom_op "-std=c++0x") ei_add_test(cxx11_tensor_custom_index "-std=c++0x") ei_add_test(cxx11_tensor_sugar "-std=c++0x") + ei_add_test(cxx11_tensor_fft "-std=c++0x") + ei_add_test(cxx11_tensor_ifft "-std=c++0x") # These tests needs nvcc # ei_add_test(cxx11_tensor_device "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp new file mode 100644 index 000000000..4aefcc79c --- /dev/null +++ b/unsupported/test/cxx11_tensor_fft.cpp @@ -0,0 +1,273 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +using Eigen::Tensor; + +template +static void test_fft_2D_golden() { + Tensor input(2, 3); + input(0, 0) = 1; + input(0, 1) = 2; + input(0, 2) = 3; + input(1, 0) = 4; + input(1, 1) = 5; + input(1, 2) = 6; + + array fft; + fft[0] = 0; + fft[1] = 1; + + Tensor, 2, DataLayout, long> output = input.template fft(fft); + + std::complex output_golden[6]; // in ColMajor order + output_golden[0] = std::complex(21, 0); + output_golden[1] = std::complex(-9, 0); + output_golden[2] = std::complex(-3, 1.73205); + output_golden[3] = std::complex( 0, 0); + output_golden[4] = std::complex(-3, -1.73205); + output_golden[5] = std::complex(0 ,0); + + std::complex c_offset = std::complex(1.0, 1.0); + + if (DataLayout == ColMajor) { + VERIFY_IS_APPROX(output(0) + c_offset, output_golden[0] + c_offset); + VERIFY_IS_APPROX(output(1) + c_offset, output_golden[1] + c_offset); + VERIFY_IS_APPROX(output(2) + c_offset, output_golden[2] + c_offset); + VERIFY_IS_APPROX(output(3) + c_offset, output_golden[3] + c_offset); + VERIFY_IS_APPROX(output(4) + c_offset, output_golden[4] + c_offset); + VERIFY_IS_APPROX(output(5) + c_offset, output_golden[5] + c_offset); + } + else { + VERIFY_IS_APPROX(output(0)+ c_offset, output_golden[0]+ c_offset); + VERIFY_IS_APPROX(output(1)+ c_offset, output_golden[2]+ c_offset); + VERIFY_IS_APPROX(output(2)+ c_offset, output_golden[4]+ c_offset); + VERIFY_IS_APPROX(output(3)+ c_offset, output_golden[1]+ c_offset); + VERIFY_IS_APPROX(output(4)+ c_offset, output_golden[3]+ c_offset); + VERIFY_IS_APPROX(output(5)+ c_offset, output_golden[5]+ c_offset); + } +} + +static void test_fft_complex_input_golden() { + Tensor, 1, ColMajor, long> input(5); + input(0) = std::complex(1, 1); + input(1) = std::complex(2, 2); + input(2) = std::complex(3, 3); + input(3) = std::complex(4, 4); + input(4) = std::complex(5, 5); + + array fft; + fft[0] = 0; + + Tensor, 1, ColMajor, long> forward_output_both_parts = input.template fft(fft); + Tensor, 1, ColMajor, long> reverse_output_both_parts = input.template fft(fft); + + Tensor forward_output_real_part = input.template fft(fft); + Tensor reverse_output_real_part = input.template fft(fft); + + Tensor forward_output_imag_part = input.template fft(fft); + Tensor reverse_output_imag_part = input.template fft(fft); + + VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0)); + + std::complex forward_golden_result[5]; + std::complex reverse_golden_result[5]; + + forward_golden_result[0] = std::complex(15.000000000000000,+15.000000000000000); + forward_golden_result[1] = std::complex(-5.940954801177935, +0.940954801177934); + forward_golden_result[2] = std::complex(-3.312299240582266, -1.687700759417735); + forward_golden_result[3] = std::complex(-1.687700759417735, -3.312299240582266); + forward_golden_result[4] = std::complex( 0.940954801177934, -5.940954801177935); + + reverse_golden_result[0] = std::complex( 3.000000000000000, + 3.000000000000000); + reverse_golden_result[1] = std::complex( 0.188190960235587, - 1.188190960235587); + reverse_golden_result[2] = std::complex(-0.337540151883547, - 0.662459848116453); + reverse_golden_result[3] = std::complex(-0.662459848116453, - 0.337540151883547); + reverse_golden_result[4] = std::complex(-1.188190960235587, + 0.188190960235587); + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(forward_output_both_parts(i), forward_golden_result[i]); + VERIFY_IS_APPROX(forward_output_real_part(i), forward_golden_result[i].real()); + VERIFY_IS_APPROX(forward_output_imag_part(i), forward_golden_result[i].imag()); + } + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(reverse_output_both_parts(i), reverse_golden_result[i]); + VERIFY_IS_APPROX(reverse_output_real_part(i), reverse_golden_result[i].real()); + VERIFY_IS_APPROX(reverse_output_imag_part(i), reverse_golden_result[i].imag()); + } +} + +static void test_fft_real_input_golden() { + Tensor input(5); + input(0) = 1.0; + input(1) = 2.0; + input(2) = 3.0; + input(3) = 4.0; + input(4) = 5.0; + + array fft; + fft[0] = 0; + + Tensor, 1, ColMajor, long> forward_output_both_parts = input.template fft(fft); + Tensor, 1, ColMajor, long> reverse_output_both_parts = input.template fft(fft); + + Tensor forward_output_real_part = input.template fft(fft); + Tensor reverse_output_real_part = input.template fft(fft); + + Tensor forward_output_imag_part = input.template fft(fft); + Tensor reverse_output_imag_part = input.template fft(fft); + + VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0)); + + std::complex forward_golden_result[5]; + std::complex reverse_golden_result[5]; + + + forward_golden_result[0] = std::complex( 15, 0); + forward_golden_result[1] = std::complex(-2.5, +3.44095480117793); + forward_golden_result[2] = std::complex(-2.5, +0.81229924058227); + forward_golden_result[3] = std::complex(-2.5, -0.81229924058227); + forward_golden_result[4] = std::complex(-2.5, -3.44095480117793); + + reverse_golden_result[0] = std::complex( 3.0, 0); + reverse_golden_result[1] = std::complex(-0.5, -0.688190960235587); + reverse_golden_result[2] = std::complex(-0.5, -0.162459848116453); + reverse_golden_result[3] = std::complex(-0.5, +0.162459848116453); + reverse_golden_result[4] = std::complex(-0.5, +0.688190960235587); + + std::complex c_offset(1.0, 1.0); + float r_offset = 1.0; + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(forward_output_both_parts(i) + c_offset, forward_golden_result[i] + c_offset); + VERIFY_IS_APPROX(forward_output_real_part(i) + r_offset, forward_golden_result[i].real() + r_offset); + VERIFY_IS_APPROX(forward_output_imag_part(i) + r_offset, forward_golden_result[i].imag() + r_offset); + } + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(reverse_output_both_parts(i) + c_offset, reverse_golden_result[i] + c_offset); + VERIFY_IS_APPROX(reverse_output_real_part(i) + r_offset, reverse_golden_result[i].real() + r_offset); + VERIFY_IS_APPROX(reverse_output_imag_part(i) + r_offset, reverse_golden_result[i].imag() + r_offset); + } +} + + +template +static void test_fft_real_input_energy() { + + Eigen::DSizes dimensions; + int total_size = 1; + for (int i = 0; i < TensorRank; ++i) { + dimensions[i] = rand() % 20 + 1; + total_size *= dimensions[i]; + } + const DSizes arr = dimensions; + + typedef typename internal::conditional, RealScalar>::type InputScalar; + + Tensor input; + input.resize(arr); + input.setRandom(); + + array fft; + for (int i = 0; i < TensorRank; ++i) { + fft[i] = i; + } + + typedef typename internal::conditional, RealScalar>::type OutputScalar; + Tensor output; + output = input.template fft(fft); + + for (int i = 0; i < TensorRank; ++i) { + VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i)); + } + + float energy_original = 0.0; + float energy_after_fft = 0.0; + + for (int i = 0; i < total_size; ++i) { + energy_original += pow(std::abs(input(i)), 2); + } + + for (int i = 0; i < total_size; ++i) { + energy_after_fft += pow(std::abs(output(i)), 2); + } + + if(FFTDirection == FFT_FORWARD) { + VERIFY_IS_APPROX(energy_original, energy_after_fft / total_size); + } + else { + VERIFY_IS_APPROX(energy_original, energy_after_fft * total_size); + } +} + +void test_cxx11_tensor_fft() { + test_fft_complex_input_golden(); + test_fft_real_input_golden(); + + test_fft_2D_golden(); + test_fft_2D_golden(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); + test_fft_real_input_energy(); +} diff --git a/unsupported/test/cxx11_tensor_ifft.cpp b/unsupported/test/cxx11_tensor_ifft.cpp new file mode 100644 index 000000000..5fd88fa6c --- /dev/null +++ b/unsupported/test/cxx11_tensor_ifft.cpp @@ -0,0 +1,154 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +using Eigen::Tensor; + +template +static void test_1D_fft_ifft_invariant(int sequence_length) { + Tensor tensor(sequence_length); + tensor.setRandom(); + + array fft; + fft[0] = 0; + + Tensor, 1, DataLayout> tensor_after_fft; + Tensor, 1, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), sequence_length); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), sequence_length); + + for (int i = 0; i < sequence_length; ++i) { + VERIFY_IS_APPROX(static_cast(tensor(i)), static_cast(std::real(tensor_after_fft_ifft(i)))); + } +} + +template +static void test_2D_fft_ifft_invariant(int dim0, int dim1) { + Tensor tensor(dim0, dim1); + tensor.setRandom(); + + array fft; + fft[0] = 0; + fft[1] = 1; + + Tensor, 2, DataLayout> tensor_after_fft; + Tensor, 2, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + //std::cout << "[" << i << "][" << j << "]" << " Original data: " << tensor(i,j) << " Transformed data:" << tensor_after_fft_ifft(i,j) << std::endl; + VERIFY_IS_APPROX(static_cast(tensor(i,j)), static_cast(std::real(tensor_after_fft_ifft(i,j)))); + } + } +} + +template +static void test_3D_fft_ifft_invariant(int dim0, int dim1, int dim2) { + Tensor tensor(dim0, dim1, dim2); + tensor.setRandom(); + + array fft; + fft[0] = 0; + fft[1] = 1; + fft[2] = 2; + + Tensor, 3, DataLayout> tensor_after_fft; + Tensor, 3, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + for (int k = 0; k < dim2; ++k) { + VERIFY_IS_APPROX(static_cast(tensor(i,j,k)), static_cast(std::real(tensor_after_fft_ifft(i,j,k)))); + } + } + } +} + +template +static void test_sub_fft_ifft_invariant(int dim0, int dim1, int dim2, int dim3) { + Tensor tensor(dim0, dim1, dim2, dim3); + tensor.setRandom(); + + array fft; + fft[0] = 2; + fft[1] = 0; + + Tensor, 4, DataLayout> tensor_after_fft; + Tensor tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(3), dim3); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(3), dim3); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + for (int k = 0; k < dim2; ++k) { + for (int l = 0; l < dim3; ++l) { + VERIFY_IS_APPROX(static_cast(tensor(i,j,k,l)), static_cast(tensor_after_fft_ifft(i,j,k,l))); + } + } + } + } +} + +void test_cxx11_tensor_ifft() { + CALL_SUBTEST(test_1D_fft_ifft_invariant(4)); + CALL_SUBTEST(test_1D_fft_ifft_invariant(16)); + CALL_SUBTEST(test_1D_fft_ifft_invariant(32)); + CALL_SUBTEST(test_1D_fft_ifft_invariant(1024*1024)); + + CALL_SUBTEST(test_2D_fft_ifft_invariant(4,4)); + CALL_SUBTEST(test_2D_fft_ifft_invariant(8,16)); + CALL_SUBTEST(test_2D_fft_ifft_invariant(16,32)); + CALL_SUBTEST(test_2D_fft_ifft_invariant(1024,1024)); + + CALL_SUBTEST(test_3D_fft_ifft_invariant(4,4,4)); + CALL_SUBTEST(test_3D_fft_ifft_invariant(8,16,32)); + CALL_SUBTEST(test_3D_fft_ifft_invariant(16,4,8)); + CALL_SUBTEST(test_3D_fft_ifft_invariant(256,256,256)); + + CALL_SUBTEST(test_sub_fft_ifft_invariant(4,4,4,4)); + CALL_SUBTEST(test_sub_fft_ifft_invariant(8,16,32,64)); + CALL_SUBTEST(test_sub_fft_ifft_invariant(16,4,8,12)); + CALL_SUBTEST(test_sub_fft_ifft_invariant(64,64,64,64)); +} From 2dd944661380875b5536658185041acbd459a225 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Oct 2015 16:53:36 -0700 Subject: [PATCH 017/344] Added mapping between a specific device and the corresponding packet type --- .../Eigen/CXX11/src/Tensor/TensorMeta.h | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 07735fa5f..3952e733c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -32,6 +32,29 @@ template <> struct max_n_1<0> { }; +// Default packet types +template +struct PacketType { + typedef typename internal::packet_traits::type type; + static const int size = internal::unpacket_traits::size; +}; + +// For CUDA packet types when using a GpuDevice +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +template <> +struct PacketType { + typedef float4 type; + static const int size = 4; +}; +template <> +struct PacketType { + typedef double2 type; + static const int size = 2; +}; +#endif + + + // Tuple mimics std::pair but works on e.g. nvcc. template struct Tuple { public: From ac99b4924976cb2d06a1747cd86e792de60f16c3 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Oct 2015 16:54:21 -0700 Subject: [PATCH 018/344] Added missing glue logic --- unsupported/Eigen/CXX11/Tensor | 1 + unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 7 +++++++ .../CXX11/src/Tensor/TensorForwardDeclarations.h | 13 +++++++++++++ 3 files changed, 21 insertions(+) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 3331ccb55..282ea00bb 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -81,6 +81,7 @@ #include "src/Tensor/TensorContractionCuda.h" #include "src/Tensor/TensorConversion.h" #include "src/Tensor/TensorConvolution.h" +#include "src/Tensor/TensorFFT.h" #include "src/Tensor/TensorPatch.h" #include "src/Tensor/TensorImagePatch.h" #include "src/Tensor/TensorVolumePatch.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 1b85f5ef5..b004fdd7d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -334,6 +334,13 @@ class TensorBase return TensorConvolutionOp(derived(), kernel.derived(), dims); } + // Fourier transforms + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorFFTOp + fft(const FFT& fft) const { + return TensorFFTOp(derived(), fft); + } + // Reductions. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReductionOp, const Dims, const Derived> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index c22444e6f..fbeb9c59a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -29,6 +29,7 @@ template class Tenso template class TensorContractionOp; template class TensorConversionOp; template class TensorConvolutionOp; +template class TensorFFTOp; template class TensorPatchOp; template class TensorImagePatchOp; template class TensorVolumePatchOp; @@ -58,6 +59,18 @@ struct DefaultDevice; struct ThreadPoolDevice; struct GpuDevice; +enum FFTResultType { + RealPart = 0, + ImagPart = 1, + BothParts = 2 +}; + +enum FFTDirection { + FFT_FORWARD = 0, + FFT_REVERSE = 1 +}; + + namespace internal { template From 54b23cce1661b17e166f07d16169fde76d8d7aa0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 23 Oct 2015 10:36:33 +0200 Subject: [PATCH 019/344] Switch to MPL2 --- test/metis_support.cpp | 22 ++++------------------ test/sparselu.cpp | 22 +++------------------- 2 files changed, 7 insertions(+), 37 deletions(-) diff --git a/test/metis_support.cpp b/test/metis_support.cpp index 932b04074..d87c56a13 100644 --- a/test/metis_support.cpp +++ b/test/metis_support.cpp @@ -3,24 +3,10 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #include "sparse_solver.h" #include #include diff --git a/test/sparselu.cpp b/test/sparselu.cpp index 78615ff3b..bd000baf1 100644 --- a/test/sparselu.cpp +++ b/test/sparselu.cpp @@ -3,25 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // SparseLU solve does not accept column major matrices for the destination. // However, as expected, the generic check_sparse_square_solving routines produces row-major From 0905ed539065adbe41e063313712518403458509 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 23 Oct 2015 14:41:25 +0200 Subject: [PATCH 020/344] remove useless cstdint header --- unsupported/test/mpreal/mpreal.h | 1 - 1 file changed, 1 deletion(-) diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index 7d14c0961..c4f6cf0cb 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -56,7 +56,6 @@ #include #include #include -#include #include #include From c244081490d86f0156cee4f0d05873897afcbf21 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 23 Oct 2015 14:48:54 +0200 Subject: [PATCH 021/344] disable usage of INTMAX_T --- unsupported/test/mpreal/mpreal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index c4f6cf0cb..9b96ec411 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -107,7 +107,7 @@ #define MPREAL_HAVE_EXPLICIT_CONVERTERS #endif -#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h +//#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h #if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG) #define MPREAL_MSVC_DEBUGVIEW_CODE DebugView = toString(); From 9ea39ce13c453127844cff474730af119e889cd1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Oct 2015 09:15:34 -0700 Subject: [PATCH 022/344] Refined the #ifdef __CUDACC__ guard to ensure that when trying to compile gpu code with a non cuda compiler results in a linking error instead of bogus code. --- .../Eigen/CXX11/src/Tensor/TensorDevice.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorDeviceType.h | 11 ++- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 78 +++++++++++-------- 3 files changed, 54 insertions(+), 37 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 17f10c07b..7b2485fb7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -106,7 +106,7 @@ template class TensorDevice class TensorDevice { public: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h index 2ff7d471d..300ee8ac0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -287,6 +287,7 @@ class StreamInterface { virtual void deallocate(void* buffer) const = 0; }; +#if defined(__CUDACC__) static cudaDeviceProp* m_deviceProperties; static bool m_devicePropInitialized = false; @@ -362,7 +363,7 @@ class CudaStreamDevice : public StreamInterface { const cudaStream_t* stream_; int device_; }; - +#endif // __CUDACC__ struct GpuDevice { // The StreamInterface is not owned: the caller is @@ -450,7 +451,7 @@ struct GpuDevice { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { -#ifndef __CUDA_ARCH__ +#if defined(__CUDACC__) && !defined(__CUDA_ARCH__) cudaError_t err = cudaStreamSynchronize(stream_->stream()); assert(err == cudaSuccess); #else @@ -477,8 +478,12 @@ struct GpuDevice { // This function checks if the CUDA runtime recorded an error for the // underlying stream device. inline bool ok() const { +#ifdef __CUDACC__ cudaError_t error = cudaStreamQuery(stream_->stream()); return (error == cudaSuccess) || (error == cudaErrorNotReady); +#else + return false; +#endif } private: @@ -493,10 +498,12 @@ struct GpuDevice { // FIXME: Should be device and kernel specific. +#ifdef __CUDACC__ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { cudaError_t status = cudaDeviceSetSharedMemConfig(config); assert(status == cudaSuccess); } +#endif #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index b2800aefb..95fc9fec6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -149,7 +149,24 @@ class TensorExecutor // GPU: the evaluation of the expression is offloaded to a GPU. -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +#if defined(EIGEN_USE_GPU) + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + static void run(const Expression& expr, const GpuDevice& device); +}; + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + static void run(const Expression& expr, const GpuDevice& device); +}; + +#if defined(__CUDACC__) + template __global__ void __launch_bounds__(1024) @@ -193,48 +210,41 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { } } - -template -class TensorExecutor +/*static*/ +template +inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) { - public: - typedef typename Expression::Index Index; - static inline void run(const Expression& expr, const GpuDevice& device) + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { - TensorEvaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); - const int block_size = device.maxCudaThreadsPerBlock(); - const Index size = array_prod(evaluator.dimensions()); - LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); - } - evaluator.cleanup(); + const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); + const int block_size = device.maxCudaThreadsPerBlock(); + const Index size = array_prod(evaluator.dimensions()); + LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); } -}; + evaluator.cleanup(); +} + +/*static*/ template -class TensorExecutor +inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) { - public: - typedef typename Expression::Index Index; - static inline void run(const Expression& expr, const GpuDevice& device) + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { - TensorEvaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); - const int block_size = device.maxCudaThreadsPerBlock(); - const Index size = array_prod(evaluator.dimensions()); - LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); - } - evaluator.cleanup(); + const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); + const int block_size = device.maxCudaThreadsPerBlock(); + const Index size = array_prod(evaluator.dimensions()); + LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); } -}; + evaluator.cleanup(); +} -#endif +#endif // __CUDACC__ +#endif // EIGEN_USE_GPU } // end namespace internal From a586fdaa913554db3136e6ff984f2a1c091c541c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Oct 2015 09:33:41 -0700 Subject: [PATCH 023/344] Reworked the tensor contraction mapper code to make it compile on Android --- .../CXX11/src/Tensor/TensorContraction.h | 249 +++++++++--------- 1 file changed, 128 insertions(+), 121 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e60fab713..e8447e84c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -33,14 +33,14 @@ template -class BaseTensorContractionMapper { +class SimpleTensorContractionMapper { public: EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : + SimpleTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : m_tensor(tensor), m_nocontract_strides(nocontract_strides), m_ij_strides(ij_strides), @@ -160,104 +160,23 @@ class BaseTensorContractionMapper { }; - template -class TensorContractionInputMapper; - -template -class TensorContractionSubMapper { + size_t packet_size, bool inner_dim_contiguous, + bool inner_dim_reordered, int Alignment> + class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ public: - typedef typename packet_traits::type Packet; - typedef typename packet_traits::half HalfPacket; - - typedef TensorContractionInputMapper ParentMapper; - typedef TensorContractionSubMapper Self; - typedef Self LinearMapper; - - EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) - : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { - return m_base_mapper(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { - return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { - m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { - return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); - } - - template - EIGEN_ALWAYS_INLINE PacketT load(Index i) const { - EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE); - return loadPacket(i); - } - - template - bool aligned(Index /*i*/) const { - return false; - } - - private: - const ParentMapper& m_base_mapper; - const Index m_vert_offset; - const Index m_horiz_offset; -}; - - -template::size : 1), - bool inner_dim_contiguous = false, bool inner_dim_reordered = (side != Lhs), int Alignment=Unaligned> -class TensorContractionInputMapper - : public BaseTensorContractionMapper { - - public: - typedef BaseTensorContractionMapper Base; - typedef TensorContractionSubMapper SubMapper; - typedef SubMapper VectorMapper; - - TensorContractionInputMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) - : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + typedef SimpleTensorContractionMapper ParentMapper; EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { - return VectorMapper(*this, i, j); - } + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } typedef typename packet_traits::type Packet; typedef typename packet_traits::half HalfPacket; @@ -322,35 +241,23 @@ class TensorContractionInputMapper }; - - template -class TensorContractionInputMapper - : public BaseTensorContractionMapper { - + bool inner_dim_contiguous, + bool inner_dim_reordered, int Alignment> +class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ public: - typedef BaseTensorContractionMapper Base; - typedef TensorContractionSubMapper SubMapper; - typedef SubMapper VectorMapper; - - TensorContractionInputMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) - : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + typedef SimpleTensorContractionMapper ParentMapper; EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { - return VectorMapper(*this, i, j); - } + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } typedef typename packet_traits::type Packet; EIGEN_DEVICE_FUNC @@ -365,6 +272,106 @@ class TensorContractionInputMapper +class TensorContractionInputMapper; + +template +class TensorContractionSubMapper { + public: + typedef typename packet_traits::type Packet; + typedef typename packet_traits::half HalfPacket; + + typedef TensorContractionInputMapper ParentMapper; + typedef TensorContractionSubMapper Self; + typedef Self LinearMapper; + + EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_base_mapper(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { + return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + return m_base_mapper.loadPacket(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + return m_base_mapper.loadPacket(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { + return m_base_mapper.loadHalfPacket(i + m_vert_offset, m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { + m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { + EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((AlignmentType == Aligned || Alignment == Unaligned), YOU_MADE_A_PROGRAMMING_MISTAKE); + return loadPacket(i); + } + + template + EIGEN_DEVICE_FUNC bool aligned(Index) const { + return false; + } + + private: + const ParentMapper& m_base_mapper; + const Index m_vert_offset; + const Index m_horiz_offset; +}; + + +template +class TensorContractionInputMapper + : public BaseTensorContractionMapper { + + public: + typedef BaseTensorContractionMapper Base; + typedef TensorContractionSubMapper SubMapper; + typedef SubMapper VectorMapper; + + EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) + : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(*this, i, j); + } +}; + + template struct traits > From c40c2ceb27d8a9dabadcbf46a4e3161f3189992f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Oct 2015 09:38:19 -0700 Subject: [PATCH 024/344] Reordered the code of fft constructor to prevent compilation warnings --- unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index d9b316de1..dbc8c2ca9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -137,7 +137,7 @@ struct TensorEvaluator, D CoordAccess = false, }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_data(NULL), m_impl(op.expression(), device), m_fft(op.fft()), m_device(device) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); for (int i = 0; i < NumDims; ++i) { eigen_assert(input_dims[i] > 0); From 57857775b461b020c16f2bbeb130d6b1863d951c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Oct 2015 10:20:51 -0700 Subject: [PATCH 025/344] Added support for arrays of size 0 --- .../Eigen/CXX11/src/Core/util/CXX11Meta.h | 32 ++++++++++++------- .../CXX11/src/Core/util/EmulateCXX11Meta.h | 31 +++++++++++++++++- 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 3a08628be..c44496865 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -112,7 +112,7 @@ template struct get<0, type_lis template struct get> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); }; template struct get> : get> {}; -template struct get<0, numeric_list> { constexpr static int value = a; }; +template struct get<0, numeric_list> { constexpr static T value = a; }; template struct get> { static_assert((n - n) < 0, "meta-template get: The element to extract from a list must be smaller than the size of the list."); }; /* always get type, regardless of dummy; good for parameter pack expansion */ @@ -326,6 +326,7 @@ constexpr inline std::array array_reverse(std::array arr) return h_array_reverse(arr, typename gen_numeric_list::type()); } + /* generic array reductions */ // can't reuse standard reduce() interface above because Intel's Compiler @@ -335,39 +336,48 @@ constexpr inline std::array array_reverse(std::array arr) // an infinite loop) template struct h_array_reduce { - constexpr static inline auto run(std::array arr) -> decltype(Reducer::run(h_array_reduce::run(arr), array_get(arr))) + constexpr static inline auto run(std::array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) { - return Reducer::run(h_array_reduce::run(arr), array_get(arr)); + return Reducer::run(h_array_reduce::run(arr, identity), array_get(arr)); } }; template struct h_array_reduce { - constexpr static inline T run(std::array arr) + constexpr static inline T run(const std::array& arr, T identity) { return array_get<0>(arr); } }; -template -constexpr inline auto array_reduce(std::array arr) -> decltype(h_array_reduce::run(arr)) +template +struct h_array_reduce { - return h_array_reduce::run(arr); + constexpr static inline T run(const std::array& arr, T identity) + { + return identity; + } +}; + +template +constexpr inline auto array_reduce(const std::array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) +{ + return h_array_reduce::run(arr, identity); } /* standard array reductions */ template -constexpr inline auto array_sum(std::array arr) -> decltype(array_reduce(arr)) +constexpr inline auto array_sum(const std::array& arr) -> decltype(array_reduce(arr, static_cast(0))) { - return array_reduce(arr); + return array_reduce(arr, static_cast(0)); } template -constexpr inline auto array_prod(std::array arr) -> decltype(array_reduce(arr)) +constexpr inline auto array_prod(const std::array& arr) -> decltype(array_reduce(arr, static_cast(1))) { - return array_reduce(arr); + return array_reduce(arr, static_cast(1)); } template diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h index 0ae638fb9..ecd1bddf1 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h @@ -113,6 +113,35 @@ template class array { }; +// Specialize array for zero size +template class array { + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& operator[] (size_t) { + eigen_assert(false && "Can't index a zero size array"); + return *static_cast(NULL); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& operator[] (size_t) const { + eigen_assert(false && "Can't index a zero size array"); + return *static_cast(NULL); + } + + static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array() { } + +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + array(std::initializer_list l) { + eigen_assert(l.size() == 0); + } +#endif +}; + + + namespace internal { /** \internal @@ -279,7 +308,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod( return arg_prod::value; } -template +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array& a) { t prod = 1; for (size_t i = 0; i < n; ++i) { prod *= a[i]; } From 56983f6d433a1c3d4fa882a402baeafbef064071 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Oct 2015 12:03:42 -0700 Subject: [PATCH 026/344] Fixed compilation warning --- unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index c44496865..96a7d5c20 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -345,7 +345,7 @@ struct h_array_reduce { template struct h_array_reduce { - constexpr static inline T run(const std::array& arr, T identity) + constexpr static inline T run(const std::array& arr, T) { return array_get<0>(arr); } From 85afb614177b4721235fafa139a77f9888a090ec Mon Sep 17 00:00:00 2001 From: Sergiu Dotenco Date: Fri, 28 Aug 2015 22:20:15 +0200 Subject: [PATCH 027/344] use explicit Scalar types for AngleAxis initialization (grafted from 89a222ce502483f8f0b02db9261445b7dff69760 ) --- Eigen/src/Geometry/AngleAxis.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h index 636712c2b..a5484ba77 100644 --- a/Eigen/src/Geometry/AngleAxis.h +++ b/Eigen/src/Geometry/AngleAxis.h @@ -133,7 +133,7 @@ public: m_angle = Scalar(other.angle()); } - static inline const AngleAxis Identity() { return AngleAxis(0, Vector3::UnitX()); } + static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); } /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. @@ -170,8 +170,8 @@ AngleAxis& AngleAxis::operator=(const QuaternionBase Date: Mon, 28 Sep 2015 10:08:26 +0200 Subject: [PATCH 028/344] bug #1069: fix AVX support on MSVC (use of non portable C-style cast) --- Eigen/src/Core/arch/AVX/MathFunctions.h | 40 ++++++++++++------------- Eigen/src/Core/arch/AVX/PacketMath.h | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 06cd56684..c4bd6bd53 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -38,10 +38,10 @@ psin(const Packet8f& _x) { _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f); _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f); _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f); - _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00); - _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04); - _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07); - _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00); + _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f); + _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f); + _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f); + _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f); // Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period. Packet8f z = pmul(x, p8f_one_over_pi); @@ -55,14 +55,14 @@ psin(const Packet8f& _x) { // is odd. Packet8i shift_ints = _mm256_cvtps_epi32(shift); Packet8i shift_isodd = - (__m256i)_mm256_and_ps((__m256)shift_ints, (__m256)p8i_one); + _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one))); #ifdef EIGEN_VECTORIZE_AVX2 Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31); #else __m128i lo = - _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 0), 31); + _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31); __m128i hi = - _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 1), 31); + _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31); Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi); #endif @@ -72,9 +72,9 @@ psin(const Packet8f& _x) { // Evaluate the polynomial for the interval [1,3] in z. _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f); - _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01); - _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02); - _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04); + _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f); Packet8f z_minus_two = psub(z, p8f_two); Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two); Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4); @@ -82,10 +82,10 @@ psin(const Packet8f& _x) { right = pmadd(right, z_minus_two2, p8f_coeff_right_0); // Evaluate the polynomial for the interval [-1,1] in z. - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01); - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02); - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03); - _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f); + _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f); Packet8f z2 = pmul(z, z); Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5); left = pmadd(left, z2, p8f_coeff_left_3); @@ -98,7 +98,7 @@ psin(const Packet8f& _x) { Packet8f res = _mm256_or_ps(left, right); // Flip the sign on the odd intervals and return the result. - res = _mm256_xor_ps(res, (__m256)sign_flip_mask); + res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask)); return res; } @@ -145,10 +145,10 @@ plog(const Packet8f& _x) { // Extract the shifted exponents (No bitwise shifting in regular AVX, so // convert to SSE and do it there). #ifdef EIGEN_VECTORIZE_AVX2 - Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32((__m256i)x, 23)); + Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23)); #else - __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 0), 23); - __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 1), 23); + __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23); + __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23); Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi)); #endif Packet8f e = _mm256_sub_ps(emm0, p8f_126f); @@ -348,7 +348,7 @@ pexp(const Packet4d& _x) { // Construct the result 2^n * exp(g) = e * x. The max is used to catch // non-finite values in the input. - return pmax(pmul(x, Packet4d(e)), _x); + return pmax(pmul(x, _mm256_castsi256_pd(e)), _x); } // Functions for sqrt. @@ -393,7 +393,7 @@ Packet4d psqrt(const Packet4d& x) { template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f prsqrt(const Packet8f& _x) { - _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000); + _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000); _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000); _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f); _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f); diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 32c121ab6..dfdb71abd 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -43,7 +43,7 @@ template<> struct is_arithmetic<__m256d> { enum { value = true }; }; const Packet4d p4d_##NAME = pset1(X) #define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \ - const Packet8f p8f_##NAME = (__m256)pset1(X) + const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1(X)) #define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \ const Packet8i p8i_##NAME = pset1(X) From 8c1ee3629f845572caaba28c746bab0ef6a0084a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 28 Sep 2015 11:36:00 +0200 Subject: [PATCH 029/344] Add support for row/col-wise lpNorm() --- Eigen/src/Core/VectorwiseOp.h | 27 +++++++++++++++++++++++++++ test/vectorwiseop.cpp | 5 +++++ 2 files changed, 32 insertions(+) diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 37171aaa0..79c7d135d 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -124,6 +124,16 @@ EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits::AddCost); EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits::MulCost); +template +struct member_lpnorm { + typedef ResultType result_type; + template struct Cost + { enum { value = (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost }; }; + EIGEN_DEVICE_FUNC explicit member_lpnorm() {} + template + EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const + { return mat.template lpNorm

(); } +}; template struct member_redux { @@ -290,6 +300,10 @@ template class VectorwiseOp typedef typename ReturnType::Type ProdReturnType; typedef Reverse ReverseReturnType; + template struct LpNormReturnType { + typedef PartialReduxExpr,Direction> Type; + }; + /** \returns a row (or column) vector expression of the smallest coefficient * of each column (or row) of the referenced expression. * @@ -340,6 +354,19 @@ template class VectorwiseOp const NormReturnType norm() const { return NormReturnType(_expression()); } + /** \returns a row (or column) vector expression of the norm + * of each column (or row) of the referenced expression. + * This is a vector with real entries, even if the original matrix has complex entries. + * + * Example: \include PartialRedux_norm.cpp + * Output: \verbinclude PartialRedux_norm.out + * + * \sa DenseBase::norm() */ + EIGEN_DEVICE_FUNC + template + const typename LpNormReturnType

::Type lpNorm() const + { return typename LpNormReturnType

::Type(_expression()); } + /** \returns a row (or column) vector expression of the norm * of each column (or row) of the referenced expression, using diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index 03f50bb5a..7ec57736c 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -191,6 +191,11 @@ template void vectorwiseop_matrix(const MatrixType& m) rcres = m1.rowwise().norm(); VERIFY_IS_APPROX(rcres(r), m1.row(r).norm()); + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum(), m1.colwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().sum(), m1.rowwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm()); + // test normalized m2 = m1.colwise().normalized(); VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized()); From 02e940fc9f55fffc69f0081781ede5949f9a37fc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 28 Sep 2015 11:55:36 +0200 Subject: [PATCH 030/344] bug #1071: improve doc on lpNorm and add example for some operator norms --- Eigen/src/Core/Dot.h | 8 +++++--- doc/TutorialReductionsVisitorsBroadcasting.dox | 13 ++++++++++++- ...orsBroadcasting_reductions_operatornorm.cpp | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 94b058466..003450f1a 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -178,9 +178,11 @@ struct lpNorm_selector } // end namespace internal -/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values - * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ - * norm, that is the maximum of the absolute values of the coefficients of *this. +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of \c *this. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. * * \sa norm() */ diff --git a/doc/TutorialReductionsVisitorsBroadcasting.dox b/doc/TutorialReductionsVisitorsBroadcasting.dox index eb6787dbc..908a1b4b2 100644 --- a/doc/TutorialReductionsVisitorsBroadcasting.dox +++ b/doc/TutorialReductionsVisitorsBroadcasting.dox @@ -32,7 +32,7 @@ Eigen also provides the \link MatrixBase::norm() norm() \endlink method, which r These operations can also operate on matrices; in that case, a n-by-p matrix is seen as a vector of size (n*p), so for example the \link MatrixBase::norm() norm() \endlink method returns the "Frobenius" or "Hilbert-Schmidt" norm. We refrain from speaking of the \f$\ell^2\f$ norm of a matrix because that can mean different things. -If you want other \f$\ell^p\f$ norms, use the \link MatrixBase::lpNorm() lpNorm

() \endlink method. The template parameter \a p can take the special value \a Infinity if you want the \f$\ell^\infty\f$ norm, which is the maximum of the absolute values of the coefficients. +If you want other coefficient-wise \f$\ell^p\f$ norms, use the \link MatrixBase::lpNorm() lpNorm

() \endlink method. The template parameter \a p can take the special value \a Infinity if you want the \f$\ell^\infty\f$ norm, which is the maximum of the absolute values of the coefficients. The following example demonstrates these methods. @@ -45,6 +45,17 @@ The following example demonstrates these methods. \verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.out +\b Operator \b norm: The 1-norm and \f$\infty\f$-norm matrix operator norms can easily be computed as follows: + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.out +
+See below for more explanations on the syntax of these expressions. + \subsection TutorialReductionsVisitorsBroadcastingReductionsBool Boolean reductions The following reductions operate on boolean values: diff --git a/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp b/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp new file mode 100644 index 000000000..62e28fc31 --- /dev/null +++ b/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + MatrixXf m(2,2); + m << 1,-2, + -3,4; + + cout << "1-norm(m) = " << m.cwiseAbs().colwise().sum().maxCoeff() + << " == " << m.colwise().lpNorm<1>().maxCoeff() << endl; + + cout << "infty-norm(m) = " << m.cwiseAbs().rowwise().sum().maxCoeff() + << " == " << m.rowwise().lpNorm<1>().maxCoeff() << endl; +} From ddb5650530e0b4edc9004b096973e65fecbddba1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 28 Sep 2015 15:07:03 +0200 Subject: [PATCH 031/344] bug #1070: propagate last three Matrix template arguments for NumTraits>::Real --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 8336c2644..bc641aef4 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -626,7 +626,8 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos, template struct NumTraits > : NumTraits< typename NumTraits::Real > { - typedef AutoDiffScalar::Real,DerType::RowsAtCompileTime,DerType::ColsAtCompileTime> > Real; + typedef AutoDiffScalar::Real,DerType::RowsAtCompileTime,DerType::ColsAtCompileTime, + DerType::Options, DerType::MaxRowsAtCompileTime, DerType::MaxColsAtCompileTime> > Real; typedef AutoDiffScalar NonInteger; typedef AutoDiffScalar& Nested; enum{ From ceafed519fe0f2004181dbc51dde7ec1f45c4f60 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 28 Sep 2015 16:56:11 +0200 Subject: [PATCH 032/344] Add support for permutation * homogenous --- Eigen/src/Geometry/Homogeneous.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index e23758d86..4107fba4d 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -445,6 +445,11 @@ struct generic_product_impl, Homogeneous +struct permutation_matrix_product + : public permutation_matrix_product +{}; + } // end namespace internal } // end namespace Eigen From d46bacb6bb97048d23987693df92032386f60ab3 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 28 Sep 2015 10:40:41 -0700 Subject: [PATCH 033/344] Call numext::mini instead of std::min in several places. --- Eigen/src/Core/MathFunctions.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index c795149b8..45cff0c65 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -314,8 +314,6 @@ struct hypot_impl typedef typename NumTraits::Real RealScalar; static inline RealScalar run(const Scalar& x, const Scalar& y) { - EIGEN_USING_STD_MATH(max); - EIGEN_USING_STD_MATH(min); EIGEN_USING_STD_MATH(abs); EIGEN_USING_STD_MATH(sqrt); RealScalar _x = abs(x); @@ -607,8 +605,6 @@ struct random_default_impl { static inline Scalar run(const Scalar& x, const Scalar& y) { - using std::max; - using std::min; typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - EIGEN_USING_STD_MATH(min); EIGEN_USING_STD_MATH(abs); - return abs(x - y) <= (min)(abs(x), abs(y)) * prec; + return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec; } EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) @@ -971,8 +966,7 @@ struct scalar_fuzzy_default_impl } static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - EIGEN_USING_STD_MATH(min); - return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec; + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; } }; From 239c9946cd1f23b2f240399d43b827913f1d51ac Mon Sep 17 00:00:00 2001 From: Doug Kwan Date: Mon, 28 Sep 2015 14:26:10 -0700 Subject: [PATCH 034/344] Specified signedness of char type in test so that test passes consistently on different targets. --- test/rand.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/rand.cpp b/test/rand.cpp index 7c8068a3b..6790acf15 100644 --- a/test/rand.cpp +++ b/test/rand.cpp @@ -35,8 +35,8 @@ template void check_all_in_range(Scalar x, Scalar y) void test_rand() { long long_ref = NumTraits::highest()/10; - char char_offset = (std::min)(g_repeat,64); - char short_offset = (std::min)(g_repeat,16000); + signed char char_offset = (std::min)(g_repeat,64); + signed char short_offset = (std::min)(g_repeat,16000); for(int i = 0; i < g_repeat*10; i++) { CALL_SUBTEST(check_in_range(10,11)); @@ -57,13 +57,13 @@ void test_rand() CALL_SUBTEST(check_in_range(-long_ref,long_ref)); } - CALL_SUBTEST(check_all_in_range(11,11)); - CALL_SUBTEST(check_all_in_range(11,11+char_offset)); - CALL_SUBTEST(check_all_in_range(-5,5)); - CALL_SUBTEST(check_all_in_range(-11-char_offset,-11)); - CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); - CALL_SUBTEST(check_all_in_range(126-char_offset,126)); - CALL_SUBTEST(check_all_in_range(-126,126)); + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+char_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-char_offset,-11)); + CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); + CALL_SUBTEST(check_all_in_range(126-char_offset,126)); + CALL_SUBTEST(check_all_in_range(-126,126)); CALL_SUBTEST(check_all_in_range(11,11)); CALL_SUBTEST(check_all_in_range(11,11+short_offset)); From b2b8c1d41efac41775cf64b6c39d2cabde9c59f7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 29 Sep 2015 11:11:40 +0200 Subject: [PATCH 035/344] Fix performance regression in sparse * dense product where "sparse" is an expression --- Eigen/src/SparseCore/SparseDenseProduct.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 67b3c9c1b..3710e05a6 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -160,8 +160,8 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - typedef typename nested_eval::type LhsNested; - typedef typename nested_eval::type RhsNested; + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; LhsNested lhsNested(lhs); RhsNested rhsNested(rhs); internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha); @@ -182,8 +182,8 @@ struct generic_product_impl template static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - typedef typename nested_eval::type LhsNested; - typedef typename nested_eval::type RhsNested; + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; LhsNested lhsNested(lhs); RhsNested rhsNested(rhs); From 75a60d3ac0ac10be7317b268911d0098acbbf472 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 30 Sep 2015 11:44:02 +0200 Subject: [PATCH 036/344] bug #1075: fix AlignedBox::sample for runtime dimension --- Eigen/src/Geometry/AlignedBox.h | 2 +- test/geo_alignedbox.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index 186d4ecad..03f1a11f8 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -163,7 +163,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * a uniform distribution */ inline VectorType sample() const { - VectorType r; + VectorType r(dim()); for(Index d=0; d(4)) ); } CALL_SUBTEST_12( specificTest1() ); CALL_SUBTEST_13( specificTest2() ); From deb261f64b9439f3a9a686b566c188b0d052e5b9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 2 Oct 2015 10:33:25 +0200 Subject: [PATCH 037/344] Make abs2 compatible with custom complex types --- Eigen/src/Core/MathFunctions.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 45cff0c65..19b7954a9 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -241,8 +241,8 @@ struct conj_retval * Implementation of abs2 * ****************************************************************************/ -template -struct abs2_impl +template +struct abs2_impl_default { typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC @@ -252,16 +252,28 @@ struct abs2_impl } }; -template -struct abs2_impl > +template +struct abs2_impl_default // IsComplex { + typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC - static inline RealScalar run(const std::complex& x) + static inline RealScalar run(const Scalar& x) { return real(x)*real(x) + imag(x)*imag(x); } }; +template +struct abs2_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return abs2_impl_default::IsComplex>::run(x); + } +}; + template struct abs2_retval { From 1879917d355a473401f27e903aa30422d4934c41 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 5 Oct 2015 16:18:22 +0200 Subject: [PATCH 038/344] Propagate cmake generator --- cmake/language_support.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/language_support.cmake b/cmake/language_support.cmake index 93f8a8fd8..2f14f30b8 100644 --- a/cmake/language_support.cmake +++ b/cmake/language_support.cmake @@ -43,7 +43,7 @@ function(workaround_9220 language language_works) if(return_code EQUAL 0) # Second run execute_process ( - COMMAND ${CMAKE_COMMAND} . + COMMAND ${CMAKE_COMMAND} . -G "${CMAKE_GENERATOR}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/language_tests/${language} RESULT_VARIABLE return_code OUTPUT_QUIET From 6100d1ae64999b0cf0c37712765c2f558d1922a0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 11:32:02 +0200 Subject: [PATCH 039/344] Improve counting of sparse temporaries --- Eigen/src/SparseCore/SparseMatrix.h | 11 +++++++++-- test/sparse_ref.cpp | 5 +++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index f18829866..14b196480 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -665,8 +665,15 @@ class SparseMatrix YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) check_template_parameters(); const bool needToTranspose = (Flags & RowMajorBit) != (internal::evaluator::Flags & RowMajorBit); - if (needToTranspose) *this = other.derived(); - else internal::call_assignment_no_alias(*this, other.derived()); + if (needToTranspose) + *this = other.derived(); + else + { + #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN + #endif + internal::call_assignment_no_alias(*this, other.derived()); + } } /** Constructs a sparse matrix from the sparse selfadjoint view \a other */ diff --git a/test/sparse_ref.cpp b/test/sparse_ref.cpp index d173ee658..93fb5efe7 100644 --- a/test/sparse_ref.cpp +++ b/test/sparse_ref.cpp @@ -64,6 +64,7 @@ void call_ref() const SparseMatrix& Ac(A); Block > Ab(A,0,1, 3,3); const Block > Abc(A,0,1,3,3); + SparseMatrix AA = A*A; VERIFY_EVALUATION_COUNT( call_ref_1(A, A), 0); @@ -80,8 +81,8 @@ void call_ref() VERIFY_EVALUATION_COUNT( call_ref_3(B, B), 1); VERIFY_EVALUATION_COUNT( call_ref_2(B.transpose(), B.transpose()), 0); VERIFY_EVALUATION_COUNT( call_ref_3(B.transpose(), B.transpose()), 0); - VERIFY_EVALUATION_COUNT( call_ref_2(A*A, A*A), 1); - VERIFY_EVALUATION_COUNT( call_ref_3(A*A, A*A), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(A*A, AA), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(A*A, AA), 1); VERIFY(!C.isCompressed()); VERIFY_EVALUATION_COUNT( call_ref_3(C, C), 1); From 1b43860bc164056834c106378d35c9e62a228010 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 11:41:03 +0200 Subject: [PATCH 040/344] Make SparseVector derive from SparseCompressedBase, thus improving compatibility between sparse vectors and matrices --- Eigen/src/SparseCore/SparseCompressedBase.h | 71 ++++++++++++----- Eigen/src/SparseCore/SparseVector.h | 88 +++------------------ 2 files changed, 63 insertions(+), 96 deletions(-) diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index 0dbb94faf..ad3085a55 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -45,13 +45,14 @@ class SparseCompressedBase /** \returns the number of non zero coefficients */ inline Index nonZeros() const { - if(isCompressed()) + if(Derived::IsVectorAtCompileTime && outerIndexPtr()==0) + return derived().nonZeros(); + else if(isCompressed()) return outerIndexPtr()[derived().outerSize()]-outerIndexPtr()[0]; else if(derived().outerSize()==0) return 0; else return innerNonZeros().sum(); - } /** \returns a const pointer to the array of values. @@ -74,10 +75,12 @@ class SparseCompressedBase /** \returns a const pointer to the array of the starting positions of the inner vectors. * This function is aimed at interoperability with other libraries. + * \warning it returns the null pointer 0 for SparseVector * \sa valuePtr(), innerIndexPtr() */ inline const StorageIndex* outerIndexPtr() const { return derived().outerIndexPtr(); } /** \returns a non-const pointer to the array of the starting positions of the inner vectors. * This function is aimed at interoperability with other libraries. + * \warning it returns the null pointer 0 for SparseVector * \sa valuePtr(), innerIndexPtr() */ inline StorageIndex* outerIndexPtr() { return derived().outerIndexPtr(); } @@ -100,12 +103,27 @@ class SparseCompressedBase::InnerIterator { public: InnerIterator(const SparseCompressedBase& mat, Index outer) - : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer), m_id(mat.outerIndexPtr()[outer]) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer) { - if(mat.isCompressed()) - m_end = mat.outerIndexPtr()[outer+1]; + if(Derived::IsVectorAtCompileTime && mat.outerIndexPtr()==0) + { + m_id = 0; + m_end = mat.nonZeros(); + } else - m_end = m_id + mat.innerNonZeroPtr()[outer]; + { + m_id = mat.outerIndexPtr()[outer]; + if(mat.isCompressed()) + m_end = mat.outerIndexPtr()[outer+1]; + else + m_end = m_id + mat.innerNonZeroPtr()[outer]; + } + } + + InnerIterator(const SparseCompressedBase& mat) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_id(0), m_end(mat.nonZeros()) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); } inline InnerIterator& operator++() { m_id++; return *this; } @@ -114,16 +132,16 @@ class SparseCompressedBase::InnerIterator inline Scalar& valueRef() { return const_cast(m_values[m_id]); } inline StorageIndex index() const { return m_indices[m_id]; } - inline Index outer() const { return m_outer; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } + inline Index outer() const { return m_outer.value(); } + inline Index row() const { return IsRowMajor ? m_outer.value() : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer.value(); } inline operator bool() const { return (m_id < m_end); } protected: const Scalar* m_values; const StorageIndex* m_indices; - const Index m_outer; + const internal::variable_if_dynamic m_outer; Index m_id; Index m_end; private: @@ -138,32 +156,45 @@ class SparseCompressedBase::ReverseInnerIterator { public: ReverseInnerIterator(const SparseCompressedBase& mat, Index outer) - : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer), m_start(mat.outerIndexPtr()[outer]) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer) { - if(mat.isCompressed()) - m_id = mat.outerIndexPtr()[outer+1]; + if(Derived::IsVectorAtCompileTime && mat.outerIndexPtr()==0) + { + m_start = 0; + m_id = mat.nonZeros(); + } else - m_id = m_start + mat.innerNonZeroPtr()[outer]; + { + m_start.value() = mat.outerIndexPtr()[outer]; + if(mat.isCompressed()) + m_id = mat.outerIndexPtr()[outer+1]; + else + m_id = m_start.value() + mat.innerNonZeroPtr()[outer]; + } } + ReverseInnerIterator(const SparseCompressedBase& mat) + : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_start(0), m_id(mat.nonZeros()) + {} + inline ReverseInnerIterator& operator--() { --m_id; return *this; } inline const Scalar& value() const { return m_values[m_id-1]; } inline Scalar& valueRef() { return const_cast(m_values[m_id-1]); } inline StorageIndex index() const { return m_indices[m_id-1]; } - inline Index outer() const { return m_outer; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } + inline Index outer() const { return m_outer.value(); } + inline Index row() const { return IsRowMajor ? m_outer.value() : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer.value(); } - inline operator bool() const { return (m_id > m_start); } + inline operator bool() const { return (m_id > m_start.value()); } protected: const Scalar* m_values; const StorageIndex* m_indices; - const Index m_outer; + const internal::variable_if_dynamic m_outer; Index m_id; - const Index m_start; + const internal::variable_if_dynamic m_start; }; namespace internal { diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index ccf9364f2..38b5a53df 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -40,7 +40,7 @@ struct traits > ColsAtCompileTime = IsColVector ? 1 : Dynamic, MaxRowsAtCompileTime = RowsAtCompileTime, MaxColsAtCompileTime = ColsAtCompileTime, - Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit), + Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit) | CompressedAccessBit, CoeffReadCost = NumTraits::ReadCost, SupportedAccessPatterns = InnerRandomAccessPattern }; @@ -63,12 +63,12 @@ struct sparse_vector_assign_selector; template class SparseVector - : public SparseMatrixBase > + : public SparseCompressedBase > { - typedef SparseMatrixBase SparseBase; + typedef SparseCompressedBase Base; public: - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseVector) + _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseVector) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, +=) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, -=) @@ -89,6 +89,11 @@ class SparseVector EIGEN_STRONG_INLINE const StorageIndex* innerIndexPtr() const { return &m_data.index(0); } EIGEN_STRONG_INLINE StorageIndex* innerIndexPtr() { return &m_data.index(0); } + + inline const StorageIndex* outerIndexPtr() const { return 0; } + inline StorageIndex* outerIndexPtr() { return 0; } + inline const StorageIndex* innerNonZeroPtr() const { return 0; } + inline StorageIndex* innerNonZeroPtr() { return 0; } /** \internal */ inline Storage& data() { return m_data; } @@ -126,8 +131,8 @@ class SparseVector public: - class InnerIterator; - class ReverseInnerIterator; + typedef typename Base::InnerIterator InnerIterator; + typedef typename Base::ReverseInnerIterator ReverseInnerIterator; inline void setZero() { m_data.clear(); } @@ -235,7 +240,7 @@ class SparseVector } inline SparseVector(const SparseVector& other) - : SparseBase(other), m_size(0) + : Base(other), m_size(0) { check_template_parameters(); *this = other.derived(); @@ -357,75 +362,6 @@ protected: Index m_size; }; -template -class SparseVector::InnerIterator -{ - public: - explicit InnerIterator(const SparseVector& vec, Index outer=0) - : m_data(vec.m_data), m_id(0), m_end(m_data.size()) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - } - - explicit InnerIterator(const internal::CompressedStorage& data) - : m_data(data), m_id(0), m_end(m_data.size()) - {} - - inline InnerIterator& operator++() { m_id++; return *this; } - - inline Scalar value() const { return m_data.value(m_id); } - inline Scalar& valueRef() { return const_cast(m_data.value(m_id)); } - - inline StorageIndex index() const { return m_data.index(m_id); } - inline Index row() const { return IsColVector ? index() : 0; } - inline Index col() const { return IsColVector ? 0 : index(); } - - inline operator bool() const { return (m_id < m_end); } - - protected: - const internal::CompressedStorage& m_data; - Index m_id; - const Index m_end; - private: - // If you get here, then you're not using the right InnerIterator type, e.g.: - // SparseMatrix A; - // SparseMatrix::InnerIterator it(A,0); - template InnerIterator(const SparseMatrixBase&,Index outer=0); -}; - -template -class SparseVector::ReverseInnerIterator -{ - public: - explicit ReverseInnerIterator(const SparseVector& vec, Index outer=0) - : m_data(vec.m_data), m_id(m_data.size()), m_start(0) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - } - - explicit ReverseInnerIterator(const internal::CompressedStorage& data) - : m_data(data), m_id(m_data.size()), m_start(0) - {} - - inline ReverseInnerIterator& operator--() { m_id--; return *this; } - - inline Scalar value() const { return m_data.value(m_id-1); } - inline Scalar& valueRef() { return const_cast(m_data.value(m_id-1)); } - - inline StorageIndex index() const { return m_data.index(m_id-1); } - inline Index row() const { return IsColVector ? index() : 0; } - inline Index col() const { return IsColVector ? 0 : index(); } - - inline operator bool() const { return (m_id > m_start); } - - protected: - const internal::CompressedStorage& m_data; - Index m_id; - const Index m_start; -}; - namespace internal { template From 9a070638de607a3bb46ae777da8cf9757f527fd8 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 11:53:19 +0200 Subject: [PATCH 041/344] Enable to view a SparseVector as a Ref --- Eigen/src/SparseCore/SparseMap.h | 11 +++++++++-- Eigen/src/SparseCore/SparseRef.h | 12 ++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index 7c512d9fe..2ef4181bc 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -63,7 +63,7 @@ class SparseMapBase Index m_outerSize; Index m_innerSize; - Index m_nnz; + Array m_zero_nnz; IndexPointer m_outerIndex; IndexPointer m_innerIndices; ScalarPointer m_values; @@ -75,6 +75,7 @@ class SparseMapBase inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; } inline Index innerSize() const { return m_innerSize; } inline Index outerSize() const { return m_outerSize; } + inline Index nonZeros() const { return m_zero_nnz[1]; } bool isCompressed() const { return m_innerNonZeros==0; } @@ -107,10 +108,16 @@ class SparseMapBase inline SparseMapBase(Index rows, Index cols, Index nnz, IndexPointer outerIndexPtr, IndexPointer innerIndexPtr, ScalarPointer valuePtr, IndexPointer innerNonZerosPtr = 0) - : m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_nnz(nnz), m_outerIndex(outerIndexPtr), + : m_outerSize(IsRowMajor?rows:cols), m_innerSize(IsRowMajor?cols:rows), m_zero_nnz(0,internal::convert_index(nnz)), m_outerIndex(outerIndexPtr), m_innerIndices(innerIndexPtr), m_values(valuePtr), m_innerNonZeros(innerNonZerosPtr) {} + // for vectors + inline SparseMapBase(Index size, Index nnz, IndexPointer innerIndexPtr, ScalarPointer valuePtr) + : m_outerSize(1), m_innerSize(size), m_zero_nnz(0,nnz), m_outerIndex(m_zero_nnz.data()), + m_innerIndices(innerIndexPtr), m_values(valuePtr), m_innerNonZeros(0) + {} + /** Empty destructor */ inline ~SparseMapBase() {} }; diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index 8df62a119..08268b9e1 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -72,6 +72,18 @@ protected: { ::new (static_cast(this)) Base(expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr()); } + + template + void construct(const SparseVector& expr) + { + ::new (static_cast(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr()); + } + + template + void construct(SparseVector& expr) + { + ::new (static_cast(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr()); + } }; } // namespace internal From 945b80c83e3bb7320667e8a9a45fc4912af85c79 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 11:57:03 +0200 Subject: [PATCH 042/344] Optimize Ref by removing useless default initialisation of SparseMapBase and SparseMatrix --- Eigen/src/SparseCore/SparseMap.h | 6 ++++++ Eigen/src/SparseCore/SparseRef.h | 12 +++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index 2ef4181bc..058a6d62b 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -120,6 +120,9 @@ class SparseMapBase /** Empty destructor */ inline ~SparseMapBase() {} + + protected: + inline SparseMapBase() {} }; template @@ -172,6 +175,9 @@ class SparseMapBase /** Empty destructor */ inline ~SparseMapBase() {} + + protected: + inline SparseMapBase() {} }; template diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index 08268b9e1..fbd489a13 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -182,8 +182,9 @@ class Ref, Options, StrideType { if((Options & int(StandardCompressedFormat)) && (!expr.isCompressed())) { - m_object = expr; - Base::construct(m_object); + TPlainObjectType* obj = reinterpret_cast(m_object_bytes); + ::new (obj) TPlainObjectType(expr); + Base::construct(*obj); } else { @@ -194,12 +195,13 @@ class Ref, Options, StrideType template void construct(const Expression& expr, internal::false_type) { - m_object = expr; - Base::construct(m_object); + TPlainObjectType* obj = reinterpret_cast(m_object_bytes); + ::new (obj) TPlainObjectType(expr); + Base::construct(*obj); } protected: - TPlainObjectType m_object; + char m_object_bytes[sizeof(TPlainObjectType)]; }; From f25bdc707feb29895f2123d0dcf2b3fb1d150e67 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 11:59:08 +0200 Subject: [PATCH 043/344] Optimise assignment into a Block by using Ref and avoiding useless updates in non-compressed mode. This make row-by-row filling of a row-major sparse matrix very efficient. --- Eigen/src/SparseCore/SparseBlock.h | 66 +++++++++++++++++++----------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 9afb5327e..202bf9a12 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -114,7 +114,8 @@ public: // and/or it is not at the end of the nonzeros of the underlying matrix. // 1 - eval to a temporary to avoid transposition and/or aliasing issues - SparseMatrix tmp(other); + Ref > tmp(other.derived()); + eigen_internal_assert(tmp.outerSize()==m_outerSize.value()); // 2 - let's check whether there is enough allocated memory Index nnz = tmp.nonZeros(); @@ -127,6 +128,7 @@ public: ? Index(matrix.data().allocatedSize()) + block_size : block_size; + bool update_trailing_pointers = false; if(nnz>free_size) { // realloc manually to reduce copies @@ -135,8 +137,8 @@ public: internal::smart_copy(&m_matrix.data().value(0), &m_matrix.data().value(0) + start, &newdata.value(0)); internal::smart_copy(&m_matrix.data().index(0), &m_matrix.data().index(0) + start, &newdata.index(0)); - internal::smart_copy(&tmp.data().value(0), &tmp.data().value(0) + nnz, &newdata.value(start)); - internal::smart_copy(&tmp.data().index(0), &tmp.data().index(0) + nnz, &newdata.index(start)); + internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &newdata.value(start)); + internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &newdata.index(start)); internal::smart_copy(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &newdata.value(start+nnz)); internal::smart_copy(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &newdata.index(start+nnz)); @@ -144,35 +146,53 @@ public: newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz); matrix.data().swap(newdata); + + update_trailing_pointers = true; } else { - // no need to realloc, simply copy the tail at its respective position and insert tmp - matrix.data().resize(start + nnz + tail_size); + if(m_matrix.isCompressed()) + { + // no need to realloc, simply copy the tail at its respective position and insert tmp + matrix.data().resize(start + nnz + tail_size); - internal::smart_memmove(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &matrix.data().value(start + nnz)); - internal::smart_memmove(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &matrix.data().index(start + nnz)); + internal::smart_memmove(&matrix.data().value(end), &matrix.data().value(end) + tail_size, &matrix.data().value(start + nnz)); + internal::smart_memmove(&matrix.data().index(end), &matrix.data().index(end) + tail_size, &matrix.data().index(start + nnz)); - internal::smart_copy(&tmp.data().value(0), &tmp.data().value(0) + nnz, &matrix.data().value(start)); - internal::smart_copy(&tmp.data().index(0), &tmp.data().index(0) + nnz, &matrix.data().index(start)); + update_trailing_pointers = true; + } + + internal::smart_copy(tmp.valuePtr(), tmp.valuePtr() + nnz, &matrix.data().value(start)); + internal::smart_copy(tmp.innerIndexPtr(), tmp.innerIndexPtr() + nnz, &matrix.data().index(start)); } - - // update innerNonZeros - if(!m_matrix.isCompressed()) - for(Index j=0; j(nnz - block_size); - for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k) + else { - matrix.outerIndexPtr()[k] += offset; + StorageIndex p = StorageIndex(start); + for(Index k=0; k(nnz - block_size); + for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k) + { + matrix.outerIndexPtr()[k] += offset; + } } return derived(); @@ -289,7 +309,7 @@ private: template BlockImpl(const SparseMatrixBase& xpr, Index i); template BlockImpl(const SparseMatrixBase& xpr); }; - + //---------- /** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this From 752a0e5339f7e624a25cbf00da818389fa235bb3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 13:25:45 +0200 Subject: [PATCH 044/344] bug #1076: fix scaling in IncompleteCholesky, improve doc, add read-only access to the different factors, remove debugging code. --- .../src/IterativeSolvers/IncompleteCholesky.h | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h index 2e2d9a851..388e6bfaa 100644 --- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h +++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h @@ -24,6 +24,11 @@ namespace Eigen { * matrix. It is advised to give a row-oriented sparse matrix * \tparam _UpLo The triangular part of the matrix to reference. * \tparam _OrderingType + * + * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$ + * where L is a lower triangular factor, S if a diagonal scaling matrix, and P is a + * fill-in reducing permutation as computed of the ordering method. + * */ template > @@ -86,6 +91,7 @@ class IncompleteCholesky : public SparseSolverBase0) m_perm = pinv.inverse(); else m_perm.resize(0); m_analysisIsOk = true; + m_isInitialized = true; } template @@ -110,9 +116,17 @@ class IncompleteCholesky : public SparseSolverBase colPtr, Ref rowIdx, Ref vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol); }; @@ -176,13 +190,21 @@ void IncompleteCholesky::factorize(const _MatrixType } m_scale = m_scale.cwiseSqrt().cwiseSqrt(); + + for (Index j = 0; j < n; ++j) + if(m_scale(j)>(std::numeric_limits::min)()) + m_scale(j) = RealScalar(1)/m_scale(j); + else + m_scale(j) = 1; + + // FIXME disable scaling if not needed, i.e., if it is roughtly uniform? (this will make solve() faster) // Scale and compute the shift for the matrix RealScalar mindiag = NumTraits::highest(); for (Index j = 0; j < n; j++) { for (Index k = colPtr[j]; k < colPtr[j+1]; k++) - vals[k] /= (m_scale(j)*m_scale(rowIdx[k])); + vals[k] *= (m_scale(j)*m_scale(rowIdx[k])); eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored"); mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag); } @@ -240,7 +262,6 @@ void IncompleteCholesky::factorize(const _MatrixType // Scale the current column if(numext::real(diag) <= 0) { - std::cerr << "\nNegative diagonal during Incomplete factorization at position " << j << " (value = " << diag << ")\n"; m_info = NumericalIssue; return; } @@ -276,7 +297,6 @@ void IncompleteCholesky::factorize(const _MatrixType updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol); } m_factorizationIsOk = true; - m_isInitialized = true; m_info = Success; } From 2d287a4898085252527ba07280ed6bd48b33afcb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 15:09:04 +0200 Subject: [PATCH 045/344] Fix Ref for Transpose --- Eigen/src/SparseCore/SparseMap.h | 7 ++++++- Eigen/src/SparseCore/SparseRef.h | 18 ++++-------------- Eigen/src/SparseCore/SparseTranspose.h | 4 +++- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index 058a6d62b..349927905 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -114,7 +114,7 @@ class SparseMapBase // for vectors inline SparseMapBase(Index size, Index nnz, IndexPointer innerIndexPtr, ScalarPointer valuePtr) - : m_outerSize(1), m_innerSize(size), m_zero_nnz(0,nnz), m_outerIndex(m_zero_nnz.data()), + : m_outerSize(1), m_innerSize(size), m_zero_nnz(0,internal::convert_index(nnz)), m_outerIndex(m_zero_nnz.data()), m_innerIndices(innerIndexPtr), m_values(valuePtr), m_innerNonZeros(0) {} @@ -173,6 +173,11 @@ class SparseMapBase : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZerosPtr) {} + // for vectors + inline SparseMapBase(Index size, Index nnz, StorageIndex* innerIndexPtr, Scalar* valuePtr) + : Base(size, nnz, innerIndexPtr, valuePtr) + {} + /** Empty destructor */ inline ~SparseMapBase() {} diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index fbd489a13..262fd9aef 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -66,23 +66,13 @@ public: protected: - template void construct(Expression& expr) { - ::new (static_cast(this)) Base(expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr()); - } - - template - void construct(const SparseVector& expr) - { - ::new (static_cast(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr()); - } - - template - void construct(SparseVector& expr) - { - ::new (static_cast(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr()); + if(expr.outerIndexPtr()==0) + ::new (static_cast(this)) Base(expr.size(), expr.nonZeros(), expr.innerIndexPtr(), expr.valuePtr()); + else + ::new (static_cast(this)) Base(expr.rows(), expr.cols(), expr.nonZeros(), expr.outerIndexPtr(), expr.innerIndexPtr(), expr.valuePtr(), expr.innerNonZeroPtr()); } }; diff --git a/Eigen/src/SparseCore/SparseTranspose.h b/Eigen/src/SparseCore/SparseTranspose.h index c2d4ac549..b6f180a41 100644 --- a/Eigen/src/SparseCore/SparseTranspose.h +++ b/Eigen/src/SparseCore/SparseTranspose.h @@ -27,12 +27,14 @@ namespace internal { using Base::derived; typedef typename Base::Scalar Scalar; typedef typename Base::StorageIndex StorageIndex; + + inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); } inline const Scalar* valuePtr() const { return derived().nestedExpression().valuePtr(); } inline const StorageIndex* innerIndexPtr() const { return derived().nestedExpression().innerIndexPtr(); } inline const StorageIndex* outerIndexPtr() const { return derived().nestedExpression().outerIndexPtr(); } inline const StorageIndex* innerNonZeroPtr() const { return derived().nestedExpression().innerNonZeroPtr(); } - + inline Scalar* valuePtr() { return derived().nestedExpression().valuePtr(); } inline StorageIndex* innerIndexPtr() { return derived().nestedExpression().innerIndexPtr(); } inline StorageIndex* outerIndexPtr() { return derived().nestedExpression().outerIndexPtr(); } From 2c676ddb40ebe380312795770fcf9fee63f63b2c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 15:43:27 +0200 Subject: [PATCH 046/344] Handle various TODOs in SSE vectorization (remove splitted storeu, enable SSE3 integer vectorization, plus minor tweaks) --- Eigen/src/Core/arch/SSE/Complex.h | 4 +-- Eigen/src/Core/arch/SSE/PacketMath.h | 52 +++++++--------------------- 2 files changed, 14 insertions(+), 42 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 2a44b6272..4f45ddfbf 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -67,7 +67,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { - // TODO optimize it for SSE3 and 4 #ifdef EIGEN_VECTORIZE_SSE3 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v), _mm_mul_ps(_mm_movehdup_ps(a.v), @@ -310,9 +309,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { - // TODO optimize it for SSE3 and 4 #ifdef EIGEN_VECTORIZE_SSE3 - return Packet1cd(_mm_addsub_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), + return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v), _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)))); #else diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 7eb7278af..e7b676f4c 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -135,7 +135,6 @@ template<> struct packet_traits : default_packet_traits typedef Packet4i type; typedef Packet4i half; enum { - // FIXME check the Has* Vectorizable = 1, AlignedOnScalar = 1, size=4, @@ -223,10 +222,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) -{ eigen_assert(false && "packet integer division are not supported by SSE"); - return pset1(0); -} // for some weird raisons, it has to be overloaded for packet of integers template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } @@ -287,8 +282,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { E #if (EIGEN_COMP_MSVC==1600) // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps // (i.e., it does not generate an unaligned load!! - // TODO On most architectures this version should also be faster than a single _mm_loadu_ps - // so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so... __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from)); res = _mm_loadh_pi(res, (const __m64*)(from+2)); return res; @@ -299,24 +292,16 @@ template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { E template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast(from)); } #else -// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would -// require pointer casting to incompatible pointer types and leads to invalid code -// because of the strict aliasing rule. The "dummy" stuff are required to enforce -// a correct instruction dependency. -// TODO: do the same for MSVC (ICC is compatible) // NOTE: with the code below, MSVC's compiler crashes! #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386 || (EIGEN_ARCH_x86_64 && EIGEN_GNUC_AT_LEAST(4, 8))) // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1 #elif EIGEN_COMP_CLANG // bug 201: Segfaults in __mm_loadh_pd with clang 2.8 #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0 #else #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0 #endif template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) @@ -374,17 +359,9 @@ template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& f template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } -template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { - EIGEN_DEBUG_UNALIGNED_STORE -#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES - _mm_storeu_pd(to, from); -#else - _mm_storel_pd((to), from); - _mm_storeh_pd((to+1), from); -#endif -} -template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), Packet2d(_mm_castps_pd(from))); } -template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast(to), Packet2d(_mm_castsi128_pd(from))); } +template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); } +template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, Index stride) { @@ -547,7 +524,6 @@ EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs) } #ifdef EIGEN_VECTORIZE_SSE3 -// TODO implement SSE2 versions as well as integer versions template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) { return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3])); @@ -556,11 +532,10 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { return _mm_hadd_pd(vecs[0], vecs[1]); } -// SSSE3 version: -// EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) -// { -// return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); -// } +template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +{ + return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); +} template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) { @@ -570,12 +545,11 @@ template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); } -// SSSE3 version: -// EIGEN_STRONG_INLINE float predux(const Packet4i& a) -// { -// Packet4i tmp0 = _mm_hadd_epi32(a,a); -// return pfirst(_mm_hadd_epi32(tmp0, tmp0)); -// } +template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +{ + Packet4i tmp0 = _mm_hadd_epi32(a,a); + return pfirst(_mm_hadd_epi32(tmp0,tmp0)); +} #else // SSE2 versions template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) @@ -606,7 +580,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1])); } -#endif // SSE3 template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { @@ -627,6 +600,7 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) tmp0 = _mm_unpackhi_epi64(tmp0, tmp1); return _mm_add_epi32(tmp0, tmp2); } +#endif // SSE3 // Other reduction functions: From fb51bab27295faa1b9ec98bbdcee667f22465c92 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 17:14:56 +0200 Subject: [PATCH 047/344] Some cleaning --- Eigen/src/Core/AssignEvaluator.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index c4ba60d6d..f94539094 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -565,9 +565,6 @@ public: EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } - // TODO get rid of this one: - EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; } - EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } @@ -737,11 +734,9 @@ template EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) { enum { - NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) - && int(Dst::SizeAtCompileTime) != 1 + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) + ) && int(Dst::SizeAtCompileTime) != 1 }; Index dstRows = NeedToTranspose ? src.cols() : src.rows(); From 26cde4db3c600f3f1ef635305be76547ab032189 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 17:18:06 +0200 Subject: [PATCH 048/344] Define Permutation*<>::Scalar to 'void', re-enable scalar type compatibility check in assignment while relaxing this test for void types. --- Eigen/src/Core/AssignEvaluator.h | 6 +----- Eigen/src/Core/PermutationMatrix.h | 5 ++++- Eigen/src/Core/util/XprHelper.h | 7 ++++++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f94539094..a02104bb0 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -751,11 +751,7 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const // TODO check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) - - // TODO this line is commented to allow matrix = permutation - // Actually, the "Scalar" type for a permutation matrix does not really make sense, - // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...? -// EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); Assignment::run(actualDst, src, func); } diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index bfe6f899a..b5f7e4a54 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -69,6 +69,7 @@ class PermutationBase : public EigenBase PlainPermutationType; using Base::derived; typedef Transpose TransposeReturnType; + typedef void Scalar; #endif /** Copies the other permutation into *this */ @@ -303,6 +304,7 @@ struct traits IndicesType; typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } @@ -426,6 +428,7 @@ struct traits, _PacketAccess> IndicesType; typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } @@ -499,7 +502,7 @@ template struct traits > { typedef PermutationStorage StorageKind; - typedef typename _IndicesType::Scalar Scalar; + typedef void Scalar; typedef typename _IndicesType::Scalar StorageIndex; typedef _IndicesType IndicesType; enum { diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index c31cd4801..052f824ae 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -654,6 +654,11 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if struct is_same_or_void { enum { value = is_same::value }; }; +template struct is_same_or_void { enum { value = 1 }; }; +template struct is_same_or_void { enum { value = 1 }; }; +template<> struct is_same_or_void { enum { value = 1 }; }; + } // end namespace internal // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor @@ -666,7 +671,7 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if::ret \ ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same::value)), \ + : int(internal::is_same_or_void::value)), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen From 69a7897e7288cd06ff2997caf007e743343dc29d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 17:21:24 +0200 Subject: [PATCH 049/344] Fix storage index type in empty permutations --- Eigen/src/SparseCore/SparseSelfAdjointView.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index b0c2e472e..97e7293c7 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -137,14 +137,14 @@ template class SparseSelfAdjointView SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src) { - PermutationMatrix pnull; + PermutationMatrix pnull; return *this = src.twistedBy(pnull); } template SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src) { - PermutationMatrix pnull; + PermutationMatrix pnull; return *this = src.twistedBy(pnull); } From 2e0ece7b66ddadb29db2a255a55ee157279eb037 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 17:22:12 +0200 Subject: [PATCH 050/344] Fix wrong casting syntax --- test/array.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/array.cpp b/test/array.cpp index 9f61c4b26..e2b20f9e9 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -247,7 +247,7 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); - VERIFY_IS_APPROX(arg(m1), ((ArrayType)(m1<0))*std::acos(-1.0)); + VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast())*std::acos(-1.0)); VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all()); VERIFY((Eigen::isnan)((m1*0.0)/0.0).all()); VERIFY((Eigen::isinf)(m4/0.0).all()); From 27a94299aaf38be8510253235751fe55bb3e3f6f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 17:23:11 +0200 Subject: [PATCH 051/344] Add sparse vector to Ref conversion unit tests, and improve output of sparse_ref unit test in case of failure. --- test/sparse_ref.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/sparse_ref.cpp b/test/sparse_ref.cpp index 93fb5efe7..d2d475616 100644 --- a/test/sparse_ref.cpp +++ b/test/sparse_ref.cpp @@ -26,7 +26,7 @@ inline void on_temporary_creation() { #define VERIFY_EVALUATION_COUNT(XPR,N) {\ nb_temporaries = 0; \ - XPR; \ + CALL_SUBTEST( XPR ); \ if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ VERIFY( (#XPR) && nb_temporaries==N ); \ } @@ -64,6 +64,8 @@ void call_ref() const SparseMatrix& Ac(A); Block > Ab(A,0,1, 3,3); const Block > Abc(A,0,1,3,3); + SparseVector vc = VectorXf::Random(10).sparseView(0.5,1); + SparseVector vr = VectorXf::Random(10).sparseView(0.5,1); SparseMatrix AA = A*A; @@ -104,6 +106,9 @@ void call_ref() VERIFY_EVALUATION_COUNT( call_ref_2(A.middleCols(1,3), A.middleCols(1,3)), 0); VERIFY_EVALUATION_COUNT( call_ref_2(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr.transpose(), vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr, vr.transpose()), 0); VERIFY_EVALUATION_COUNT( call_ref_2(A.block(1,1,3,3), A.block(1,1,3,3)), 1); // should be 0 (allocate starts/nnz only) } From 8bb51a87f7025d5133199ec59f6ca981d5789622 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 6 Oct 2015 17:24:01 +0200 Subject: [PATCH 052/344] Re-enable some invalid scalar type conversion checks by disabling explicit vectorization --- test/mixingtypes.cpp | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 71f099bb8..32d9d0be9 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -15,9 +15,13 @@ #define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them #endif -// #ifndef EIGEN_DONT_VECTORIZE -// #define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types -// #endif +#if defined(EIGEN_TEST_PART_1) || defined(EIGEN_TEST_PART_2) || defined(EIGEN_TEST_PART_3) + +#ifndef EIGEN_DONT_VECTORIZE +#define EIGEN_DONT_VECTORIZE +#endif + +#endif #include "main.h" @@ -56,10 +60,12 @@ template void mixingtypes(int size = SizeAtCompileType) // this one does not even compile with C++11 VERIFY_RAISES_ASSERT(mf+mcf); #endif - // the following do not even compile since the introduction of evaluators -// VERIFY_RAISES_ASSERT(vf=vd); -// VERIFY_RAISES_ASSERT(vf+=vd); -// VERIFY_RAISES_ASSERT(mcd=md); + +#ifdef EIGEN_DONT_VECTORIZE + VERIFY_RAISES_ASSERT(vf=vd); + VERIFY_RAISES_ASSERT(vf+=vd); + VERIFY_RAISES_ASSERT(mcd=md); +#endif // check scalar products VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); @@ -79,6 +85,7 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(vcd.asDiagonal() * md, vcd.asDiagonal() * md.template cast >()); VERIFY_IS_APPROX(mcf * vf.asDiagonal(), mcf * vf.template cast >().asDiagonal()); VERIFY_IS_APPROX(md * vcd.asDiagonal(), md.template cast >() * vcd.asDiagonal()); + // vd.asDiagonal() * mf; // does not even compile // vcd.asDiagonal() * mf; // does not even compile @@ -148,5 +155,9 @@ void test_mixingtypes() CALL_SUBTEST_1(mixingtypes<3>()); CALL_SUBTEST_2(mixingtypes<4>()); CALL_SUBTEST_3(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); + + CALL_SUBTEST_4(mixingtypes<3>()); + CALL_SUBTEST_5(mixingtypes<4>()); + CALL_SUBTEST_6(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); } } From ca0dd7ae26cfbfdc16f23b46a016f401e3db4e5c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 15:36:12 +0200 Subject: [PATCH 053/344] Fix implicit cast in unit test --- test/eigensolver_selfadjoint.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index 41b6d99ab..4c3de7a80 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp @@ -130,13 +130,13 @@ template void selfadjointeigensolver(const MatrixType& m) Tridiagonalization tridiag(symmC); VERIFY_IS_APPROX(tridiag.diagonal(), tridiag.matrixT().diagonal()); VERIFY_IS_APPROX(tridiag.subDiagonal(), tridiag.matrixT().template diagonal<-1>()); - MatrixType T = tridiag.matrixT(); + Matrix T = tridiag.matrixT(); if(rows>1 && cols>1) { // FIXME check that upper and lower part are 0: //VERIFY(T.topRightCorner(rows-2, cols-2).template triangularView().isZero()); } - VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal().real()); - VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>().real()); + VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal()); + VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>()); VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT().eval() * MatrixType(tridiag.matrixQ()).adjoint()); VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint()); From 41cc1f9033e7a316834b409eb2c6db69fd5de56d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 15:41:22 +0200 Subject: [PATCH 054/344] Remove debuging prod() and lazyprod() function, plus some cleaning in noalias assignment --- Eigen/src/Core/AssignEvaluator.h | 6 ------ Eigen/src/Core/Product.h | 23 ----------------------- Eigen/src/Core/ProductEvaluators.h | 19 +++++++++---------- test/evaluators.cpp | 22 ++++++++++++++++++++++ 4 files changed, 31 insertions(+), 39 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index a02104bb0..f4e92a808 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -716,14 +716,8 @@ EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& fun } // by-pass AssumeAliasing -// FIXME the const version should probably not be needed // When there is no aliasing, we require that 'dst' has been properly resized template class StorageBase, typename Src, typename Func> -EIGEN_DEVICE_FUNC void call_assignment(const NoAlias& dst, const Src& src, const Func& func) -{ - call_assignment_no_alias(dst.expression(), src, func); -} -template class StorageBase, typename Src, typename Func> EIGEN_DEVICE_FUNC void call_assignment(NoAlias& dst, const Src& src, const Func& func) { call_assignment_no_alias(dst.expression(), src, func); diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index b79236f15..fdd2fed3f 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -217,29 +217,6 @@ class ProductImpl }; -/*************************************************************************** -* Implementation of matrix base methods -***************************************************************************/ - - -/** \internal used to test the evaluator only - */ -template -const Product -prod(const Lhs& lhs, const Rhs& rhs) -{ - return Product(lhs,rhs); -} - -/** \internal used to test the evaluator only - */ -template -const Product -lazyprod(const Lhs& lhs, const Rhs& rhs) -{ - return Product(lhs,rhs); -} - } // end namespace Eigen #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 04e5e5e37..6e1be1227 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -177,8 +177,7 @@ struct Assignment > SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { - // TODO use operator* instead of prod() once we have made enough progress - call_assignment(dst.noalias(), prod(src.functor().m_other * src.nestedExpression().lhs(), src.nestedExpression().rhs()), func); + call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); } }; @@ -329,28 +328,28 @@ struct generic_product_impl template static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - // TODO: use the following instead of calling call_assignment, same for the other methods - // dst = lazyprod(lhs,rhs); - call_assignment(dst, lazyprod(lhs,rhs), internal::assign_op()); + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); } template static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - // dst += lazyprod(lhs,rhs); - call_assignment(dst, lazyprod(lhs,rhs), internal::add_assign_op()); + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } template static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - // dst -= lazyprod(lhs,rhs); - call_assignment(dst, lazyprod(lhs,rhs), internal::sub_assign_op()); + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); } // template // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) -// { dst += alpha * lazyprod(lhs,rhs); } +// { dst.noalias() += alpha * lhs.lazyProduct(rhs); } }; // This specialization enforces the use of a coefficient-based evaluation strategy diff --git a/test/evaluators.cpp b/test/evaluators.cpp index f41968da8..12dc1ffef 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -2,6 +2,20 @@ #include "main.h" namespace Eigen { + + template + const Product + prod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } + + template + const Product + lazyprod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } template EIGEN_STRONG_INLINE @@ -69,6 +83,14 @@ namespace Eigen { typedef typename DstXprType::Scalar Scalar; call_assignment(dst.const_cast_derived(), src.const_cast_derived(), internal::swap_assign_op()); } + + namespace internal { + template class StorageBase, typename Src, typename Func> + EIGEN_DEVICE_FUNC void call_assignment(const NoAlias& dst, const Src& src, const Func& func) + { + call_assignment_no_alias(dst.expression(), src, func); + } + } } From aba1eda71e8743454175fc315f3c0c2454e54291 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 15:44:12 +0200 Subject: [PATCH 055/344] Help clang to inline some functions, thus fixing some regressions --- Eigen/src/Core/AssignEvaluator.h | 22 +++++++++++----------- Eigen/src/Core/ProductEvaluators.h | 4 +--- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f4e92a808..a5ea19be1 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -288,7 +288,7 @@ struct dense_assignment_loop; template struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) { for(Index outer = 0; outer < kernel.outerSize(); ++outer) { for(Index inner = 0; inner < kernel.innerSize(); ++inner) { @@ -414,7 +414,7 @@ template struct dense_assignment_loop { typedef typename Kernel::PacketType PacketType; - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); @@ -455,7 +455,7 @@ struct dense_assignment_loop template struct dense_assignment_loop { - EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index size = kernel.size(); for(Index i = 0; i < size; ++i) @@ -569,19 +569,19 @@ public: EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } /// Assign src(row,col) to dst(row,col) through the assignment functor. - EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) { m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC void assignCoeff(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); @@ -590,26 +590,26 @@ public: template - EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) { m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); } template - EIGEN_DEVICE_FUNC void assignPacket(Index index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) { m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); } template - EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); assignPacket(row, col); } - EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 @@ -618,7 +618,7 @@ public: : inner; } - EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 6e1be1227..529870300 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -463,9 +463,8 @@ struct product_evaluator, ProductTag, DenseShape, && (InnerSize % packet_traits::size == 0) }; - EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { - // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } @@ -477,7 +476,6 @@ struct product_evaluator, ProductTag, DenseShape, { const Index row = RowsAtCompileTime == 1 ? 0 : index; const Index col = RowsAtCompileTime == 1 ? index : 0; - // TODO check performance regression wrt to Eigen 3.2 which has special handling of this function return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); } From f047ecc36a4e940417c1980d16e1b029539f8f10 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 15:48:35 +0200 Subject: [PATCH 056/344] _mm_hadd_epi32 is for SSSE3 only (and not SSE3) --- Eigen/src/Core/arch/SSE/PacketMath.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index e7b676f4c..2e0a807bf 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -532,10 +532,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { return _mm_hadd_pd(vecs[0], vecs[1]); } -template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) -{ - return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); -} template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) { @@ -544,12 +540,6 @@ template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); } - -template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) -{ - Packet4i tmp0 = _mm_hadd_epi32(a,a); - return pfirst(_mm_hadd_epi32(tmp0,tmp0)); -} #else // SSE2 versions template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) @@ -580,7 +570,20 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1])); } +#endif // SSE3 + +#ifdef EIGEN_VECTORIZE_SSSE3 +template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +{ + return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); +} +template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +{ + Packet4i tmp0 = _mm_hadd_epi32(a,a); + return pfirst(_mm_hadd_epi32(tmp0,tmp0)); +} +#else template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a)); @@ -600,8 +603,7 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) tmp0 = _mm_unpackhi_epi64(tmp0, tmp1); return _mm_add_epi32(tmp0, tmp2); } -#endif // SSE3 - +#endif // Other reduction functions: // mul From c6eb17cbe92d4ed29e918edb7b7a1decddcdd8b3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 15:50:42 +0200 Subject: [PATCH 057/344] Add helper routines to help bypassing some compiler otpimization when benchmarking --- bench/BenchTimer.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index 28e2bcaea..64666d75f 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -28,6 +28,14 @@ # include #endif +static void escape(void *p) { + asm volatile("" : : "g"(p) : "memory"); +} + +static void clobber() { + asm volatile("" : : : "memory"); +} + #include namespace Eigen @@ -168,6 +176,7 @@ public: CODE; \ } \ TIMER.stop(); \ + clobber(); \ } \ } From 247259f805f914f4c0f9cae44771f68da2b4eb40 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 15:51:06 +0200 Subject: [PATCH 058/344] Add a perfromance regression benchmark for lazyProduct --- .../perf_monitoring/lazy_gemm/changesets.txt | 45 ++++++ bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp | 97 +++++++++++ bench/perf_monitoring/lazy_gemm/make_plot.sh | 37 +++++ .../lazy_gemm/run_lazy_gemm.sh | 152 ++++++++++++++++++ bench/perf_monitoring/lazy_gemm/settings.txt | 15 ++ 5 files changed, 346 insertions(+) create mode 100644 bench/perf_monitoring/lazy_gemm/changesets.txt create mode 100644 bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp create mode 100755 bench/perf_monitoring/lazy_gemm/make_plot.sh create mode 100755 bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh create mode 100644 bench/perf_monitoring/lazy_gemm/settings.txt diff --git a/bench/perf_monitoring/lazy_gemm/changesets.txt b/bench/perf_monitoring/lazy_gemm/changesets.txt new file mode 100644 index 000000000..40a71c781 --- /dev/null +++ b/bench/perf_monitoring/lazy_gemm/changesets.txt @@ -0,0 +1,45 @@ +#3.0.1 +#3.1.1 +#3.2.0 +3.2.4 +#5745:37f59e65eb6c +5891:d8652709345d # introduce AVX +#5893:24b4dc92c6d3 # merge +5895:997c2ef9fc8b # introduce FMA +#5904:e1eafd14eaa1 # complex and AVX +5908:f8ee3c721251 # improve packing with ptranspose +#5921:ca808bb456b0 # merge +#5927:8b1001f9e3ac +5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks +#5949:f3488f4e45b2 # merge +#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec +#5992:4a429f5e0483 # merge +before-evaluators +#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products +#6639:c9121c60b5c7 +#6655:06f163b5221f # Properly detect FMA support on ARM +#6677:700e023044e7 # FMA has been wrongly disabled +#6681:11d31dafb0e3 +#6699:5e6e8e10aad1 # merge default to tensors +#6726:ff2d2388e7b9 # merge default to tensors +#6742:0cbd6195e829 # merge default to tensors +#6747:853d2bafeb8f # Generalized the gebp apis +6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation +#6781:9cc5a931b2c6 # generalized gemv +#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product +#6844:039efd86b75c # merge tensor +6845:7333ed40c6ef # change prefetching in gebp +#6856:b5be5e10eb7f # merge index conversion +#6893:c3a64aba7c70 # clean blocking size computation +#6898:6fb31ebe6492 # rotating kernel for ARM +6899:877facace746 # rotating kernel for ARM only +#6904:c250623ae9fa # result_of +6921:915f1b1fc158 # fix prefetching change for ARM +6923:9ff25f6dacc6 # prefetching +6933:52572e60b5d3 # blocking size strategy +6937:c8c042f286b2 # avoid redundant pack_rhs +6981:7e5d6f78da59 # dynamic loop swapping +6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache +6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. +7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) + diff --git a/bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp b/bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp new file mode 100644 index 000000000..d8ccdb5ac --- /dev/null +++ b/bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +template +inline void lazy_gemm(const MatA &A, const MatB &B, MatC &C) +{ + escape((void*)A.data()); + escape((void*)B.data()); + C.noalias() += A.lazyProduct(B); + escape((void*)C.data()); +} + +template +EIGEN_DONT_INLINE +double bench() +{ + typedef Matrix MatA; + typedef Matrix MatB; + typedef Matrix MatC; + + MatA A(m,k); + MatB B(k,n); + MatC C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e7*4/sizeof(Scalar); + double tm0 = 10, tm1 = 20; + + double flops = 2. * m * n * k; + long rep = std::max(10., std::min(10000., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, lazy_gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +template +double bench_t(int t) +{ + if(t) + return bench(); + else + return bench(); +} + +EIGEN_DONT_INLINE +double bench_mnk(int m, int n, int k, int t) +{ + int id = m*10000 + n*100 + k; + switch(id) { + case 10101 : return bench_t< 1, 1, 1>(t); break; + case 20202 : return bench_t< 2, 2, 2>(t); break; + case 30303 : return bench_t< 3, 3, 3>(t); break; + case 40404 : return bench_t< 4, 4, 4>(t); break; + case 50505 : return bench_t< 5, 5, 5>(t); break; + case 60606 : return bench_t< 6, 6, 6>(t); break; + case 70707 : return bench_t< 7, 7, 7>(t); break; + case 80808 : return bench_t< 8, 8, 8>(t); break; + case 90909 : return bench_t< 9, 9, 9>(t); break; + case 101010 : return bench_t<10,10,10>(t); break; + case 111111 : return bench_t<11,11,11>(t); break; + case 121212 : return bench_t<12,12,12>(t); break; + } + return 0; +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("settings.txt"); + long m, n, k, t; + while(settings >> m >> n >> k >> t) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench_mnk(m, n, k, t) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/bench/perf_monitoring/lazy_gemm/make_plot.sh b/bench/perf_monitoring/lazy_gemm/make_plot.sh new file mode 100755 index 000000000..609c471f9 --- /dev/null +++ b/bench/perf_monitoring/lazy_gemm/make_plot.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# base name of the bench +# it reads $1.out +# and generates $1.pdf +WHAT=$1 + +header="rev " +while read line +do + if [ ! -z '$line' ]; then + header="$header \"$line\"" + fi +done < settings.txt + +echo $header > $WHAT.out.header +cat $WHAT.out >> $WHAT.out.header + + +echo "set title '$WHAT'" > $WHAT.gnuplot +echo "set key autotitle columnhead outside " >> $WHAT.gnuplot +echo "set xtics rotate 1" >> $WHAT.gnuplot + +echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot +echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot + +col=`cat settings.txt | wc -l` +echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot +echo " " >> $WHAT.gnuplot + +gnuplot -persist < $WHAT.gnuplot + +# generate a png file +# convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png + +# clean +rm $WHAT.out.header $WHAT.gnuplot \ No newline at end of file diff --git a/bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh b/bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh new file mode 100755 index 000000000..76640f66d --- /dev/null +++ b/bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh @@ -0,0 +1,152 @@ +#!/bin/bash + +# Examples of environment variables to be set: +# PREFIX="haswell-fma-" +# CXX_FLAGS="-mfma" + +# Options: +# -up : enforce the recomputation of existing data, and keep best results as a merging strategy +# -s : recompute selected changesets only and keep bests + + +if echo "$*" | grep '\-up' > /dev/null; then + update=true +else + update=false +fi + +if echo "$*" | grep '\-s' > /dev/null; then + selected=true +else + selected=false +fi + +global_args="$*" + +if [ $selected == true ]; then + echo "Recompute selected changesets only and keep bests" +elif [ $update == true ]; then + echo "(Re-)Compute all changesets and keep bests" +else + echo "Skip previously computed changesets" +fi + + + +if [ ! -d "eigen_src" ]; then + hg clone https://bitbucket.org/eigen/eigen eigen_src +else + cd eigen_src + hg pull -u + cd .. +fi + +if [ ! -z '$CXX' ]; then + CXX=g++ +fi + +function make_backup +{ + if [ -f "$1.out" ]; then + mv "$1.out" "$1.backup" + fi +} + +function merge +{ + count1=`echo $1 | wc -w` + count2=`echo $2 | wc -w` + + if [ $count1 == $count2 ]; then + a=( $1 ); b=( $2 ) + res="" + for (( i=0 ; i<$count1 ; i++ )); do + ai=${a[$i]}; bi=${b[$i]} + tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l` + res="$res $tmp" + done + echo $res + + else + echo $1 + fi +} + +function test_current +{ + rev=$1 + scalar=$2 + name=$3 + + prev="" + if [ -e "$name.backup" ]; then + prev=`grep $rev "$name.backup" | cut -c 14-` + fi + res=$prev + count_rev=`echo $prev | wc -w` + count_ref=`cat "settings.txt" | wc -l` + if echo "$global_args" | grep "$rev" > /dev/null; then + rev_found=true + else + rev_found=false + fi +# echo $update et $selected et $rev_found because $rev et "$global_args" +# echo $count_rev et $count_ref + if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then + if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src lazy_gemm.cpp -DSCALAR=$scalar -o $name; then + curr=`./$name` + if [ $count_rev == $count_ref ]; then + echo "merge previous $prev" + echo "with new $curr" + else + echo "got $curr" + fi + res=`merge "$curr" "$prev"` +# echo $res + echo "$rev $res" >> $name.out + else + echo "Compilation failed, skip rev $rev" + fi + else + echo "Skip existing results for $rev / $name" + echo "$rev $res" >> $name.out + fi +} + +make_backup $PREFIX"slazy_gemm" +make_backup $PREFIX"dlazy_gemm" +make_backup $PREFIX"clazy_gemm" + +cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev +do + if [ ! -z '$rev' ]; then + echo "Testing rev $rev" + cd eigen_src + hg up -C $rev > /dev/null + actual_rev=`hg identify | cut -f1 -d' '` + cd .. + + test_current $actual_rev float $PREFIX"slazy_gemm" + test_current $actual_rev double $PREFIX"dlazy_gemm" + test_current $actual_rev "std::complex" $PREFIX"clazy_gemm" + fi + +done + +echo "Float:" +cat $PREFIX"slazy_gemm.out" +echo "" + +echo "Double:" +cat $PREFIX"dlazy_gemm.out" +echo "" + +echo "Complex:" +cat $PREFIX"clazy_gemm.out" +echo "" + +./make_plot.sh $PREFIX"slazy_gemm" +./make_plot.sh $PREFIX"dlazy_gemm" +./make_plot.sh $PREFIX"clazy_gemm" + + diff --git a/bench/perf_monitoring/lazy_gemm/settings.txt b/bench/perf_monitoring/lazy_gemm/settings.txt new file mode 100644 index 000000000..407d5d4fa --- /dev/null +++ b/bench/perf_monitoring/lazy_gemm/settings.txt @@ -0,0 +1,15 @@ +1 1 1 0 +2 2 2 0 +3 3 3 0 +4 4 4 0 +4 4 4 1 +5 5 5 0 +6 6 6 0 +7 7 7 0 +7 7 7 1 +8 8 8 0 +9 9 9 0 +10 10 10 0 +11 11 11 0 +12 12 12 0 +12 12 12 1 From 13294b5152578bf9eb2bfe67f20b8f14f9e755b4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 16:06:48 +0200 Subject: [PATCH 059/344] Unify gemm and lazy_gemm benchmarks --- bench/perf_monitoring/gemm/changesets.txt | 2 + bench/perf_monitoring/gemm/gemm.cpp | 2 +- .../gemm/{settings.txt => gemm_settings.txt} | 0 .../{lazy_gemm => gemm}/lazy_gemm.cpp | 2 +- .../lazy_gemm_settings.txt} | 0 bench/perf_monitoring/gemm/make_plot.sh | 3 +- .../gemm/{run_gemm.sh => run.sh} | 32 ++-- .../perf_monitoring/lazy_gemm/changesets.txt | 45 ------ bench/perf_monitoring/lazy_gemm/make_plot.sh | 37 ----- .../lazy_gemm/run_lazy_gemm.sh | 152 ------------------ 10 files changed, 24 insertions(+), 251 deletions(-) rename bench/perf_monitoring/gemm/{settings.txt => gemm_settings.txt} (100%) rename bench/perf_monitoring/{lazy_gemm => gemm}/lazy_gemm.cpp (97%) rename bench/perf_monitoring/{lazy_gemm/settings.txt => gemm/lazy_gemm_settings.txt} (100%) rename bench/perf_monitoring/gemm/{run_gemm.sh => run.sh} (78%) delete mode 100644 bench/perf_monitoring/lazy_gemm/changesets.txt delete mode 100755 bench/perf_monitoring/lazy_gemm/make_plot.sh delete mode 100755 bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index 40a71c781..fb3e48e99 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -42,4 +42,6 @@ before-evaluators 6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache 6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. 7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) +7591:09a8e2186610 # 3.3-alpha1 +7650:b0f3c8f43025 # help clang inlining diff --git a/bench/perf_monitoring/gemm/gemm.cpp b/bench/perf_monitoring/gemm/gemm.cpp index 72eb9cab6..614bd4737 100644 --- a/bench/perf_monitoring/gemm/gemm.cpp +++ b/bench/perf_monitoring/gemm/gemm.cpp @@ -53,7 +53,7 @@ int main(int argc, char **argv) { std::vector results; - std::ifstream settings("settings.txt"); + std::ifstream settings("gemm_settings.txt"); long m, n, k; while(settings >> m >> n >> k) { diff --git a/bench/perf_monitoring/gemm/settings.txt b/bench/perf_monitoring/gemm/gemm_settings.txt similarity index 100% rename from bench/perf_monitoring/gemm/settings.txt rename to bench/perf_monitoring/gemm/gemm_settings.txt diff --git a/bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp b/bench/perf_monitoring/gemm/lazy_gemm.cpp similarity index 97% rename from bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp rename to bench/perf_monitoring/gemm/lazy_gemm.cpp index d8ccdb5ac..b443218d7 100644 --- a/bench/perf_monitoring/lazy_gemm/lazy_gemm.cpp +++ b/bench/perf_monitoring/gemm/lazy_gemm.cpp @@ -83,7 +83,7 @@ int main(int argc, char **argv) { std::vector results; - std::ifstream settings("settings.txt"); + std::ifstream settings("lazy_gemm_settings.txt"); long m, n, k, t; while(settings >> m >> n >> k >> t) { diff --git a/bench/perf_monitoring/lazy_gemm/settings.txt b/bench/perf_monitoring/gemm/lazy_gemm_settings.txt similarity index 100% rename from bench/perf_monitoring/lazy_gemm/settings.txt rename to bench/perf_monitoring/gemm/lazy_gemm_settings.txt diff --git a/bench/perf_monitoring/gemm/make_plot.sh b/bench/perf_monitoring/gemm/make_plot.sh index 609c471f9..4d6053501 100755 --- a/bench/perf_monitoring/gemm/make_plot.sh +++ b/bench/perf_monitoring/gemm/make_plot.sh @@ -4,6 +4,7 @@ # it reads $1.out # and generates $1.pdf WHAT=$1 +bench=$2 header="rev " while read line @@ -11,7 +12,7 @@ do if [ ! -z '$line' ]; then header="$header \"$line\"" fi -done < settings.txt +done < $bench"_settings.txt" echo $header > $WHAT.out.header cat $WHAT.out >> $WHAT.out.header diff --git a/bench/perf_monitoring/gemm/run_gemm.sh b/bench/perf_monitoring/gemm/run.sh similarity index 78% rename from bench/perf_monitoring/gemm/run_gemm.sh rename to bench/perf_monitoring/gemm/run.sh index 3fa6a3661..bfb4ecfac 100755 --- a/bench/perf_monitoring/gemm/run_gemm.sh +++ b/bench/perf_monitoring/gemm/run.sh @@ -1,5 +1,8 @@ #!/bin/bash +# ./run.sh gemm +# ./run.sh lazy_gemm + # Examples of environment variables to be set: # PREFIX="haswell-fma-" # CXX_FLAGS="-mfma" @@ -8,6 +11,7 @@ # -up : enforce the recomputation of existing data, and keep best results as a merging strategy # -s : recompute selected changesets only and keep bests +bench=$1 if echo "$*" | grep '\-up' > /dev/null; then update=true @@ -84,7 +88,7 @@ function test_current fi res=$prev count_rev=`echo $prev | wc -w` - count_ref=`cat "settings.txt" | wc -l` + count_ref=`cat $bench"_settings.txt" | wc -l` if echo "$global_args" | grep "$rev" > /dev/null; then rev_found=true else @@ -93,7 +97,7 @@ function test_current # echo $update et $selected et $rev_found because $rev et "$global_args" # echo $count_rev et $count_ref if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then - if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src gemm.cpp -DSCALAR=$scalar -o $name; then + if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then curr=`./$name` if [ $count_rev == $count_ref ]; then echo "merge previous $prev" @@ -113,9 +117,9 @@ function test_current fi } -make_backup $PREFIX"sgemm" -make_backup $PREFIX"dgemm" -make_backup $PREFIX"cgemm" +make_backup $PREFIX"s"$bench +make_backup $PREFIX"d"$bench +make_backup $PREFIX"c"$bench cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev do @@ -126,27 +130,27 @@ do actual_rev=`hg identify | cut -f1 -d' '` cd .. - test_current $actual_rev float $PREFIX"sgemm" - test_current $actual_rev double $PREFIX"dgemm" - test_current $actual_rev "std::complex" $PREFIX"cgemm" + test_current $actual_rev float $PREFIX"s"$bench + test_current $actual_rev double $PREFIX"d"$bench + test_current $actual_rev "std::complex" $PREFIX"c"$bench fi done echo "Float:" -cat $PREFIX"sgemm.out" +cat $PREFIX"s"$bench.out" echo "" echo "Double:" -cat $PREFIX"dgemm.out" +cat $PREFIX"d"$bench.out" echo "" echo "Complex:" -cat $PREFIX"cgemm.out" +cat $PREFIX"c"$bench.out" echo "" -./make_plot.sh $PREFIX"sgemm" -./make_plot.sh $PREFIX"dgemm" -./make_plot.sh $PREFIX"cgemm" +./make_plot.sh $PREFIX"s"$bench $bench +./make_plot.sh $PREFIX"d"$bench $bench +./make_plot.sh $PREFIX"c"$bench $bench diff --git a/bench/perf_monitoring/lazy_gemm/changesets.txt b/bench/perf_monitoring/lazy_gemm/changesets.txt deleted file mode 100644 index 40a71c781..000000000 --- a/bench/perf_monitoring/lazy_gemm/changesets.txt +++ /dev/null @@ -1,45 +0,0 @@ -#3.0.1 -#3.1.1 -#3.2.0 -3.2.4 -#5745:37f59e65eb6c -5891:d8652709345d # introduce AVX -#5893:24b4dc92c6d3 # merge -5895:997c2ef9fc8b # introduce FMA -#5904:e1eafd14eaa1 # complex and AVX -5908:f8ee3c721251 # improve packing with ptranspose -#5921:ca808bb456b0 # merge -#5927:8b1001f9e3ac -5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks -#5949:f3488f4e45b2 # merge -#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec -#5992:4a429f5e0483 # merge -before-evaluators -#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products -#6639:c9121c60b5c7 -#6655:06f163b5221f # Properly detect FMA support on ARM -#6677:700e023044e7 # FMA has been wrongly disabled -#6681:11d31dafb0e3 -#6699:5e6e8e10aad1 # merge default to tensors -#6726:ff2d2388e7b9 # merge default to tensors -#6742:0cbd6195e829 # merge default to tensors -#6747:853d2bafeb8f # Generalized the gebp apis -6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation -#6781:9cc5a931b2c6 # generalized gemv -#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product -#6844:039efd86b75c # merge tensor -6845:7333ed40c6ef # change prefetching in gebp -#6856:b5be5e10eb7f # merge index conversion -#6893:c3a64aba7c70 # clean blocking size computation -#6898:6fb31ebe6492 # rotating kernel for ARM -6899:877facace746 # rotating kernel for ARM only -#6904:c250623ae9fa # result_of -6921:915f1b1fc158 # fix prefetching change for ARM -6923:9ff25f6dacc6 # prefetching -6933:52572e60b5d3 # blocking size strategy -6937:c8c042f286b2 # avoid redundant pack_rhs -6981:7e5d6f78da59 # dynamic loop swapping -6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache -6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. -7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) - diff --git a/bench/perf_monitoring/lazy_gemm/make_plot.sh b/bench/perf_monitoring/lazy_gemm/make_plot.sh deleted file mode 100755 index 609c471f9..000000000 --- a/bench/perf_monitoring/lazy_gemm/make_plot.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -# base name of the bench -# it reads $1.out -# and generates $1.pdf -WHAT=$1 - -header="rev " -while read line -do - if [ ! -z '$line' ]; then - header="$header \"$line\"" - fi -done < settings.txt - -echo $header > $WHAT.out.header -cat $WHAT.out >> $WHAT.out.header - - -echo "set title '$WHAT'" > $WHAT.gnuplot -echo "set key autotitle columnhead outside " >> $WHAT.gnuplot -echo "set xtics rotate 1" >> $WHAT.gnuplot - -echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot -echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot - -col=`cat settings.txt | wc -l` -echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot -echo " " >> $WHAT.gnuplot - -gnuplot -persist < $WHAT.gnuplot - -# generate a png file -# convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png - -# clean -rm $WHAT.out.header $WHAT.gnuplot \ No newline at end of file diff --git a/bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh b/bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh deleted file mode 100755 index 76640f66d..000000000 --- a/bench/perf_monitoring/lazy_gemm/run_lazy_gemm.sh +++ /dev/null @@ -1,152 +0,0 @@ -#!/bin/bash - -# Examples of environment variables to be set: -# PREFIX="haswell-fma-" -# CXX_FLAGS="-mfma" - -# Options: -# -up : enforce the recomputation of existing data, and keep best results as a merging strategy -# -s : recompute selected changesets only and keep bests - - -if echo "$*" | grep '\-up' > /dev/null; then - update=true -else - update=false -fi - -if echo "$*" | grep '\-s' > /dev/null; then - selected=true -else - selected=false -fi - -global_args="$*" - -if [ $selected == true ]; then - echo "Recompute selected changesets only and keep bests" -elif [ $update == true ]; then - echo "(Re-)Compute all changesets and keep bests" -else - echo "Skip previously computed changesets" -fi - - - -if [ ! -d "eigen_src" ]; then - hg clone https://bitbucket.org/eigen/eigen eigen_src -else - cd eigen_src - hg pull -u - cd .. -fi - -if [ ! -z '$CXX' ]; then - CXX=g++ -fi - -function make_backup -{ - if [ -f "$1.out" ]; then - mv "$1.out" "$1.backup" - fi -} - -function merge -{ - count1=`echo $1 | wc -w` - count2=`echo $2 | wc -w` - - if [ $count1 == $count2 ]; then - a=( $1 ); b=( $2 ) - res="" - for (( i=0 ; i<$count1 ; i++ )); do - ai=${a[$i]}; bi=${b[$i]} - tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l` - res="$res $tmp" - done - echo $res - - else - echo $1 - fi -} - -function test_current -{ - rev=$1 - scalar=$2 - name=$3 - - prev="" - if [ -e "$name.backup" ]; then - prev=`grep $rev "$name.backup" | cut -c 14-` - fi - res=$prev - count_rev=`echo $prev | wc -w` - count_ref=`cat "settings.txt" | wc -l` - if echo "$global_args" | grep "$rev" > /dev/null; then - rev_found=true - else - rev_found=false - fi -# echo $update et $selected et $rev_found because $rev et "$global_args" -# echo $count_rev et $count_ref - if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then - if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src lazy_gemm.cpp -DSCALAR=$scalar -o $name; then - curr=`./$name` - if [ $count_rev == $count_ref ]; then - echo "merge previous $prev" - echo "with new $curr" - else - echo "got $curr" - fi - res=`merge "$curr" "$prev"` -# echo $res - echo "$rev $res" >> $name.out - else - echo "Compilation failed, skip rev $rev" - fi - else - echo "Skip existing results for $rev / $name" - echo "$rev $res" >> $name.out - fi -} - -make_backup $PREFIX"slazy_gemm" -make_backup $PREFIX"dlazy_gemm" -make_backup $PREFIX"clazy_gemm" - -cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev -do - if [ ! -z '$rev' ]; then - echo "Testing rev $rev" - cd eigen_src - hg up -C $rev > /dev/null - actual_rev=`hg identify | cut -f1 -d' '` - cd .. - - test_current $actual_rev float $PREFIX"slazy_gemm" - test_current $actual_rev double $PREFIX"dlazy_gemm" - test_current $actual_rev "std::complex" $PREFIX"clazy_gemm" - fi - -done - -echo "Float:" -cat $PREFIX"slazy_gemm.out" -echo "" - -echo "Double:" -cat $PREFIX"dlazy_gemm.out" -echo "" - -echo "Complex:" -cat $PREFIX"clazy_gemm.out" -echo "" - -./make_plot.sh $PREFIX"slazy_gemm" -./make_plot.sh $PREFIX"dlazy_gemm" -./make_plot.sh $PREFIX"clazy_gemm" - - From 131db3c552304e1fa2c9438ec71a99ef32eea54e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 16:37:46 +0200 Subject: [PATCH 060/344] Fix return by value versus ref typo in IncompleteCholesky --- unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h index 388e6bfaa..29617f2ca 100644 --- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h +++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h @@ -125,7 +125,7 @@ class IncompleteCholesky : public SparseSolverBase Date: Thu, 8 Oct 2015 10:50:39 +0200 Subject: [PATCH 061/344] Doc: add link to doc of sparse solver concept --- Eigen/src/CholmodSupport/CholmodSupport.h | 8 ++++ .../BasicPreconditioners.h | 6 +++ Eigen/src/IterativeLinearSolvers/BiCGSTAB.h | 2 + .../ConjugateGradient.h | 2 + .../IterativeLinearSolvers/IncompleteLUT.h | 2 + .../LeastSquareConjugateGradient.h | 2 + Eigen/src/PaStiXSupport/PaStiXSupport.h | 12 +++-- Eigen/src/PardisoSupport/PardisoSupport.h | 6 +++ Eigen/src/SPQRSupport/SuiteSparseQRSupport.h | 44 ++++++++++--------- Eigen/src/SparseCholesky/SimplicialCholesky.h | 4 ++ Eigen/src/SparseLU/SparseLU.h | 3 +- Eigen/src/SparseQR/SparseQR.h | 2 + Eigen/src/SuperLUSupport/SuperLUSupport.h | 4 ++ doc/Doxyfile.in | 3 +- doc/SparseLinearSystems.dox | 4 +- 15 files changed, 77 insertions(+), 27 deletions(-) diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index d2b0fb282..3ff0c6fc9 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -350,6 +350,8 @@ class CholmodBase : public SparseSolverBase * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT @@ -397,6 +399,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT @@ -442,6 +446,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers @@ -489,6 +495,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index ff7f08c1c..b850630a3 100644 --- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -23,6 +23,8 @@ namespace Eigen { * * \tparam _Scalar the type of the scalar. * + * \implsparsesolverconcept + * * This preconditioner is suitable for both selfadjoint and general problems. * The diagonal entries are pre-inverted and stored into a dense vector. * @@ -114,6 +116,8 @@ class DiagonalPreconditioner * * \tparam _Scalar the type of the scalar. * + * \implsparsesolverconcept + * * The diagonal entries are pre-inverted and stored into a dense vector. * * \sa class LeastSquaresConjugateGradient, class DiagonalPreconditioner @@ -172,6 +176,8 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> /** \ingroup IterativeLinearSolvers_Module * \brief A naive preconditioner which approximates any matrix as the identity matrix * + * \implsparsesolverconcept + * * \sa class DiagonalPreconditioner */ class IdentityPreconditioner diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index a34ee7628..76e86a94a 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -132,6 +132,8 @@ struct traits > * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner * + * \implsparsesolverconcept + * * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index 8f33c446d..59092dc18 100644 --- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -118,6 +118,8 @@ struct traits > * Default is \c Lower, best performance is \c Lower|Upper. * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner * + * \implsparsesolverconcept + * * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index b644163f1..10b9fcc18 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -67,6 +67,8 @@ Index QuickSplit(VectorV &row, VectorI &ind, Index ncut) * \class IncompleteLUT * \brief Incomplete LU factorization with dual-threshold strategy * + * \implsparsesolverconcept + * * During the numerical factorization, two dropping rules are used : * 1) any element whose magnitude is less than some tolerance is dropped. * This tolerance is obtained by multiplying the input tolerance @p droptol diff --git a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h index 1d819927e..b578b2a7f 100644 --- a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h @@ -119,6 +119,8 @@ struct traits > * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix. * \tparam _Preconditioner the type of the preconditioner. Default is LeastSquareDiagonalPreconditioner * + * \implsparsesolverconcept + * * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h index 4e73edf5b..cec4149e7 100644 --- a/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -398,7 +398,9 @@ bool PastixBase::_solve_impl(const MatrixBase &b, MatrixBase &x * NOTE : Note that if the analysis and factorization phase are called separately, * the input matrix will be symmetrized at each call, hence it is advised to * symmetrize the matrix in a end-user program and set \p IsStrSym to true - * + * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers * */ @@ -509,7 +511,9 @@ class PastixLU : public PastixBase< PastixLU<_MatrixType> > * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX - * + * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -590,7 +594,9 @@ class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> > * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX - * + * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 234e3213b..9c18eb9b9 100755 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -371,6 +371,8 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -421,6 +423,8 @@ class PardisoLU : public PardisoImpl< PardisoLU > * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used. * Upper|Lower can be used to tell both triangular parts can be used as input. * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -479,6 +483,8 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > * Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix. * Upper|Lower can be used to tell both triangular parts can be used as input. * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h index 4ad22f8b4..ac2de9b04 100644 --- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h @@ -33,27 +33,29 @@ namespace Eigen { } // End namespace internal /** - * \ingroup SPQRSupport_Module - * \class SPQR - * \brief Sparse QR factorization based on SuiteSparseQR library - * - * This class is used to perform a multithreaded and multifrontal rank-revealing QR decomposition - * of sparse matrices. The result is then used to solve linear leasts_square systems. - * Clearly, a QR factorization is returned such that A*P = Q*R where : - * - * P is the column permutation. Use colsPermutation() to get it. - * - * Q is the orthogonal matrix represented as Householder reflectors. - * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. - * You can then apply it to a vector. - * - * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix. - * NOTE : The Index type of R is always UF_long. You can get it with SPQR::Index - * - * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<> - * NOTE - * - */ + * \ingroup SPQRSupport_Module + * \class SPQR + * \brief Sparse QR factorization based on SuiteSparseQR library + * + * This class is used to perform a multithreaded and multifrontal rank-revealing QR decomposition + * of sparse matrices. The result is then used to solve linear leasts_square systems. + * Clearly, a QR factorization is returned such that A*P = Q*R where : + * + * P is the column permutation. Use colsPermutation() to get it. + * + * Q is the orthogonal matrix represented as Householder reflectors. + * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. + * You can then apply it to a vector. + * + * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix. + * NOTE : The Index type of R is always UF_long. You can get it with SPQR::Index + * + * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<> + * + * \implsparsesolverconcept + * + * + */ template class SPQR : public SparseSolverBase > { diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h index f56298e8c..ef612cf45 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -319,6 +319,8 @@ template struct traits or NaturalOrdering<>. Default is AMDOrdering<> * + * \implsparsesolverconcept + * * \sa class SimplicialLDLT, class AMDOrdering, class NaturalOrdering */ template @@ -408,6 +410,8 @@ public: * or Upper. Default is Lower. * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> * + * \implsparsesolverconcept + * * \sa class SimplicialLLT, class AMDOrdering, class NaturalOrdering */ template diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 8cdd29c7b..73368cba4 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -64,7 +64,8 @@ template struct SparseLUMatrixURetu * * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD - * + * + * \implsparsesolverconcept * * \sa \ref TutorialSparseDirectSolvers * \sa \ref OrderingMethods_Module diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index 548b3f9b0..bbd337c40 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -62,6 +62,8 @@ namespace internal { * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module * OrderingMethods \endlink module for the list of built-in and external ordering methods. * + * \implsparsesolverconcept + * * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). * */ diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index d067d8fdf..7c644eef6 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -449,6 +449,8 @@ class SuperLUBase : public SparseSolverBase * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers */ template @@ -800,6 +802,8 @@ typename SuperLU::Scalar SuperLU::determinant() const * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \implsparsesolverconcept + * * \sa \ref TutorialSparseDirectSolvers, class ConjugateGradient, class BiCGSTAB */ diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 800bb30ee..e15ba84bd 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -223,7 +223,8 @@ ALIASES = "only_for_vectors=This is only for vectors (either row- "note_about_using_kernel_to_study_multiple_solutions=If you need a complete analysis of the space of solutions, take the one solution obtained by this method and add to it elements of the kernel, as determined by kernel()." \ "note_about_checking_solutions=This method just tries to find as good a solution as possible. If you want to check whether a solution exists or if it is accurate, just call this function to get a result and then compute the error of this result, or use MatrixBase::isApprox() directly, for instance like this: \code bool a_solution_exists = (A*result).isApprox(b, precision); \endcode This method avoids dividing by zero, so that the non-existence of a solution doesn't by itself mean that you'll get \c inf or \c nan values." \ "note_try_to_help_rvo=This function returns the result by value. In order to make that efficient, it is implemented as just a return statement using a special constructor, hopefully allowing the compiler to perform a RVO (return value optimization)." \ - "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"" + "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"" \ + "implsparsesolverconcept=This class follows the \link TutorialSparseSolverConcept sparse solver concept \endlink." ALIASES += "eigenAutoToc= " diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index 48c18f46f..b7f5c600b 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -4,7 +4,7 @@ In Eigen, there are several methods available to solve linear systems when the c \eigenAutoToc -\section TutorialSparseDirectSolvers Sparse solvers +\section TutorialSparseSolverList List of sparse solvers %Eigen currently provides a limited set of built-in solvers, as well as wrappers to external solver libraries. They are summarized in the following table: @@ -53,6 +53,8 @@ They are summarized in the following table: Here \c SPD means symmetric positive definite. +\section TutorialSparseSolverConcept Sparse solver concept + All these solvers follow the same general concept. Here is a typical and general example: \code From 632e7705b1a9e8404ce59525bd55d7283fcbd36e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 10:54:36 +0200 Subject: [PATCH 062/344] Improve doc of IncompleteCholesky --- .../src/IterativeSolvers/IncompleteCholesky.h | 105 ++++++++++++------ 1 file changed, 69 insertions(+), 36 deletions(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h index 29617f2ca..2cad1cec6 100644 --- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h +++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,22 +16,28 @@ namespace Eigen { /** - * \brief Modified Incomplete Cholesky with dual threshold - * - * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with - * Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999 - * - * \tparam _MatrixType The type of the sparse matrix. It should be a symmetric - * matrix. It is advised to give a row-oriented sparse matrix - * \tparam _UpLo The triangular part of the matrix to reference. - * \tparam _OrderingType - * - * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$ - * where L is a lower triangular factor, S if a diagonal scaling matrix, and P is a - * fill-in reducing permutation as computed of the ordering method. - * - */ - + * \brief Modified Incomplete Cholesky with dual threshold + * + * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with + * Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999 + * + * \tparam _MatrixType The type of the sparse matrix. It is advised to give a row-oriented sparse matrix + * \tparam _UpLo The triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> + * + * \implsparsesolverconcept + * + * It performs the following incomplete factorization: \f$ S P A P' S \approx L L' \f$ + * where L is a lower triangular factor, S is a diagonal scaling matrix, and P is a + * fill-in reducing permutation as computed by the ordering method. + * + * \b Shifting \b strategy: Let \f$ B = S P A P' S \f$ be the scaled matrix on which the factorization is carried out, + * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed + * on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where + * \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$. + * + */ template > class IncompleteCholesky : public SparseSolverBase > { @@ -50,38 +57,50 @@ class IncompleteCholesky : public SparseSolverBase > VectorList; enum { UpLo = _UpLo }; public: + + /** Default constructor leaving the object in a partly non-initialized stage. + * + * You must call compute() or the pair analyzePattern()/factorize() to make it valid. + * + * \sa IncompleteCholesky(const MatrixType&) + */ IncompleteCholesky() : m_initialShift(1e-3),m_factorizationIsOk(false) {} + /** Constructor computing the incomplete factorization for the given matrix \a matrix. + */ template IncompleteCholesky(const MatrixType& matrix) : m_initialShift(1e-3),m_factorizationIsOk(false) { compute(matrix); } + /** \returns number of rows of the factored matrix */ Index rows() const { return m_L.rows(); } + /** \returns number of columns of the factored matrix */ Index cols() const { return m_L.cols(); } /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was succesful, + * It triggers an assertion if \c *this has not been initialized through the respective constructor, + * or a call to compute() or analyzePattern(). + * + * \returns \c Success if computation was successful, * \c NumericalIssue if the matrix appears to be negative. */ ComputationInfo info() const { - eigen_assert(m_isInitialized && "IncompleteLLT is not initialized."); + eigen_assert(m_isInitialized && "IncompleteCholesky is not initialized."); return m_info; } - /** - * \brief Set the initial shift parameter - */ + /** \brief Set the initial shift parameter \f$ \sigma \f$. + */ void setInitialShift(RealScalar shift) { m_initialShift = shift; } - /** - * \brief Computes the fill reducing permutation vector. - */ + /** \brief Computes the fill reducing permutation vector using the sparsity pattern of \a mat + */ template void analyzePattern(const MatrixType& mat) { @@ -90,20 +109,36 @@ class IncompleteCholesky : public SparseSolverBase(), pinv); if(pinv.size()>0) m_perm = pinv.inverse(); else m_perm.resize(0); - m_analysisIsOk = true; + m_L.resize(mat.rows(), mat.cols()); + m_analysisIsOk = true; m_isInitialized = true; + m_info = Success; } + /** \brief Performs the numerical factorization of the input matrix \a mat + * + * The method analyzePattern() or compute() must have been called beforehand + * with a matrix having the same pattern. + * + * \sa compute(), analyzePattern() + */ template - void factorize(const MatrixType& amat); + void factorize(const MatrixType& mat); + /** Computes or re-computes the incomplete Cholesky factorization of the input matrix \a mat + * + * It is a shortcut for a sequential call to the analyzePattern() and factorize() methods. + * + * \sa analyzePattern(), factorize() + */ template - void compute(const MatrixType& matrix) + void compute(const MatrixType& mat) { - analyzePattern(matrix); - factorize(matrix); + analyzePattern(mat); + factorize(mat); } + // internal template void _solve_impl(const Rhs& b, Dest& x) const { @@ -119,13 +154,13 @@ class IncompleteCholesky : public SparseSolverBase::factorize(const _MatrixType // Dropping strategy : Keep only the p largest elements per column, where p is the number of elements in the column of the original matrix. Other strategies will be added - m_L.resize(mat.rows(), mat.cols()); - // Apply the fill-reducing permutation computed in analyzePattern() if (m_perm.rows() == mat.rows() ) // To detect the null permutation { @@ -197,7 +230,7 @@ void IncompleteCholesky::factorize(const _MatrixType else m_scale(j) = 1; - // FIXME disable scaling if not needed, i.e., if it is roughtly uniform? (this will make solve() faster) + // FIXME disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster) // Scale and compute the shift for the matrix RealScalar mindiag = NumTraits::highest(); @@ -297,7 +330,7 @@ void IncompleteCholesky::factorize(const _MatrixType updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol); } m_factorizationIsOk = true; - m_info = Success; + m_info = Success; } template From 1b148d9e2e1fdd5ab39c22230ac93dfa52cfa973 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 11:32:46 +0200 Subject: [PATCH 063/344] Move IncompleteCholesky to official modules --- Eigen/IterativeLinearSolvers | 1 + .../src/IterativeLinearSolvers}/IncompleteCholesky.h | 0 test/CMakeLists.txt | 1 + {unsupported/test => test}/incomplete_cholesky.cpp | 2 +- unsupported/Eigen/IterativeSolvers | 1 - unsupported/test/CMakeLists.txt | 1 - 6 files changed, 3 insertions(+), 3 deletions(-) rename {unsupported/Eigen/src/IterativeSolvers => Eigen/src/IterativeLinearSolvers}/IncompleteCholesky.h (100%) rename {unsupported/test => test}/incomplete_cholesky.cpp (96%) diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index f5fdcd9e5..027e537c6 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -34,6 +34,7 @@ #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h similarity index 100% rename from unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h rename to Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 767e82f21..9684c90e8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -236,6 +236,7 @@ ei_add_test(sparse_solvers) ei_add_test(sparse_permutations) ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) +ei_add_test(incomplete_cholesky) ei_add_test(bicgstab) ei_add_test(lscg) ei_add_test(sparselu) diff --git a/unsupported/test/incomplete_cholesky.cpp b/test/incomplete_cholesky.cpp similarity index 96% rename from unsupported/test/incomplete_cholesky.cpp rename to test/incomplete_cholesky.cpp index cc2ed698e..435e2839a 100644 --- a/unsupported/test/incomplete_cholesky.cpp +++ b/test/incomplete_cholesky.cpp @@ -18,7 +18,7 @@ template void test_incomplete_cholesky_T() ConjugateGradient > > cg_illt_lower_amd; ConjugateGradient > > cg_illt_lower_nat; ConjugateGradient > > cg_illt_upper_amd; - ConjugateGradient > > cg_illt_upper_nat; + ConjugateGradient > > cg_illt_upper_nat; CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_amd) ); diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers index ff0d59b6e..f0c017f00 100644 --- a/unsupported/Eigen/IterativeSolvers +++ b/unsupported/Eigen/IterativeSolvers @@ -33,7 +33,6 @@ #include "../../Eigen/Jacobi" #include "../../Eigen/Householder" #include "src/IterativeSolvers/GMRES.h" -#include "src/IterativeSolvers/IncompleteCholesky.h" //#include "src/IterativeSolvers/SSORPreconditioner.h" #include "src/IterativeSolvers/MINRES.h" diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 79e70ced4..7a1737edd 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -91,7 +91,6 @@ endif() ei_add_test(polynomialsolver) ei_add_test(polynomialutils) ei_add_test(splines) -ei_add_test(incomplete_cholesky) ei_add_test(gmres) ei_add_test(minres) ei_add_test(levenberg_marquardt) From 5d7ebfb275cbfe3536a19aa8466d19f16cb79862 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 11:33:17 +0200 Subject: [PATCH 064/344] Update sparse solver list to make it more complete --- doc/SparseLinearSystems.dox | 75 +++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index b7f5c600b..ba6a12035 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -6,34 +6,61 @@ In Eigen, there are several methods available to solve linear systems when the c \section TutorialSparseSolverList List of sparse solvers -%Eigen currently provides a limited set of built-in solvers, as well as wrappers to external solver libraries. -They are summarized in the following table: +%Eigen currently provides a wide set of built-in solvers, as well as wrappers to external solver libraries. +They are summarized in the following tables: + +\subsection TutorialSparseSolverList_Direct Built-in direct solvers + + + + + + + + + + + + + + + + + + + + + + +
ClassSolver kindMatrix kindFeatures related to performanceLicense

Notes

SimplicialLLT \n #includeDirect LLt factorizationSPDFill-in reducingLGPLSimplicialLDLT is often preferable
SimplicialLDLT \n #includeDirect LDLt factorizationSPDFill-in reducingLGPLRecommended for very sparse and not too large problems (e.g., 2D Poisson eq.)
SparseLU \n #include LU factorization Square Fill-in reducing, Leverage fast dense algebraMPL2optimized for small and large problems with irregular patterns
SparseQR \n #include QR factorizationAny, rectangular Fill-in reducingMPL2recommended for least-square problems, has a basic rank-revealing feature
+ +\subsection TutorialSparseSolverList_Iterative Built-in iterative solvers + + + + + + + + + + + + + + + + + + + +
ClassSolver kindMatrix kindSupported preconditioners, [default]License

Notes

ConjugateGradient \n #include Classic iterative CGSPDIdentityPreconditioner, [DiagonalPreconditioner], IncompleteCholeskyMPL2Recommended for large symmetric problems (e.g., 3D Poisson eq.)
LeastSquaresConjugateGradient \n #includeCG for rectangular least-square problemRectangularIdentityPreconditioner, [LeastSquareDiagonalPreconditioner]MPL2Solve for min |A'Ax-b|^2 without forming A'A
BiCGSTAB \n #includeIterative stabilized bi-conjugate gradientSquareIdentityPreconditioner, [DiagonalPreconditioner], IncompleteLUTMPL2To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.
+ +\subsection TutorialSparseSolverList_Wrapper Wrappers to external solvers - - - - - - - - - - - - - - - - - - - - - - From e30bc89190c32c1154d09a4d4740022c52d5a87b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 15:20:50 +0200 Subject: [PATCH 065/344] Add missing include of std vector --- Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h index 2cad1cec6..1e2e9f9b9 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h @@ -10,8 +10,8 @@ #ifndef EIGEN_INCOMPLETE_CHOlESKY_H #define EIGEN_INCOMPLETE_CHOlESKY_H -#include "Eigen/src/IterativeLinearSolvers/IncompleteLUT.h" -#include + +#include #include namespace Eigen { From 5cc7251188110f2d24425eac3ce00d051d2b2c55 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 15:22:04 +0200 Subject: [PATCH 066/344] Some cleaning in evaluators --- Eigen/src/Core/AssignEvaluator.h | 4 ---- Eigen/src/Core/CoreEvaluators.h | 26 +++++++++++++------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index a5ea19be1..121a722f2 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -311,7 +311,6 @@ struct dense_assignment_loop template struct dense_assignment_loop { - typedef typename Kernel::StorageIndex StorageIndex; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -392,7 +391,6 @@ struct dense_assignment_loop template struct dense_assignment_loop { - typedef typename Kernel::StorageIndex StorageIndex; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -438,7 +436,6 @@ struct dense_assignment_loop struct dense_assignment_loop { - typedef typename Kernel::StorageIndex StorageIndex; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { typedef typename Kernel::DstEvaluatorType::XprType DstXprType; @@ -545,7 +542,6 @@ public: typedef DstEvaluatorTypeT DstEvaluatorType; typedef SrcEvaluatorTypeT SrcEvaluatorType; typedef typename DstEvaluatorType::Scalar Scalar; - typedef typename DstEvaluatorType::StorageIndex StorageIndex; typedef copy_using_evaluator_traits AssignmentTraits; typedef typename AssignmentTraits::PacketType PacketType; diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 2cbb6cd44..214114ebe 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -98,9 +98,6 @@ struct evaluator template struct evaluator_base : public noncopyable { - // FIXME is it really usefull? - typedef typename traits::StorageIndex StorageIndex; - // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits ExpressionTraits; @@ -636,13 +633,16 @@ struct evaluator > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - PacketAlignment = unpacket_traits::alignment, - - KeepsPacketAccess = bool(HasNoInnerStride) - && ( bool(IsDynamicSize) - || HasNoOuterStride - || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ), + // FIXME I don't get the code below, in particular why outer-stride-at-compile-time should have any effect on PacketAccessBit... + // Let's remove the code below for 3.4 if no issue occur +// PacketAlignment = unpacket_traits::alignment, +// KeepsPacketAccess = bool(HasNoInnerStride) +// && ( bool(IsDynamicSize) +// || HasNoOuterStride +// || ( OuterStrideAtCompileTime!=Dynamic +// && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ), + KeepsPacketAccess = bool(HasNoInnerStride), + Flags0 = evaluator::Flags, Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) ? int(Flags0) : int(Flags0 & ~LinearAccessBit), @@ -825,14 +825,14 @@ struct block_evaluator(block) { - // FIXME this should be an internal assertion + // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); } }; // -------------------- Select -------------------- -// TODO shall we introduce a ternary_evaluator? +// NOTE shall we introduce a ternary_evaluator? // TODO enable vectorization for Select template @@ -957,7 +957,7 @@ struct unary_evaluator > } protected: - const ArgTypeNested m_arg; // FIXME is it OK to store both the argument and its evaluator?? (we have the same situation in evaluator_product) + const ArgTypeNested m_arg; evaluator m_argImpl; const variable_if_dynamic m_rows; const variable_if_dynamic m_cols; From aa6b1aebf373fba262fab7cd833881eac4fed8ef Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 15:57:05 +0200 Subject: [PATCH 067/344] Properly implement PartialReduxExpr on top of evaluators, and fix multiple evaluation of nested expression --- Eigen/src/Core/CoreEvaluators.h | 40 +++++++++++++++++++-------------- Eigen/src/Core/VectorwiseOp.h | 23 +++---------------- test/vectorwiseop.cpp | 20 ++++++++++++----- 3 files changed, 40 insertions(+), 43 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 214114ebe..b96ef99fa 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -965,17 +965,16 @@ protected: // -------------------- PartialReduxExpr -------------------- -// -// This is a wrapper around the expression object. -// TODO: Find out how to write a proper evaluator without duplicating -// the row() and col() member functions. template< typename ArgType, typename MemberOp, int Direction> struct evaluator > : evaluator_base > { typedef PartialReduxExpr XprType; - typedef typename XprType::Scalar InputScalar; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; enum { TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) }; @@ -986,27 +985,34 @@ struct evaluator > Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits), - Alignment = 0 // FIXME this could be improved + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType expr) - : m_expr(expr) + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) + : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) {} typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const - { - return m_expr.coeff(row, col); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(j)); + else + return m_functor(m_arg.row(i)); } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_expr.coeff(index); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(index)); + else + return m_functor(m_arg.row(index)); } protected: - const XprType m_expr; + const ArgTypeNested m_arg; + const MemberOp m_functor; }; diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 79c7d135d..5de53732e 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -41,8 +41,6 @@ struct traits > typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; typedef typename MatrixType::Scalar InputScalar; - typedef typename ref_selector::type MatrixTypeNested; - typedef typename remove_all::type _MatrixTypeNested; enum { RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime, ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, @@ -62,8 +60,6 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr) - typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; - typedef typename internal::traits::_MatrixTypeNested _MatrixTypeNested; EIGEN_DEVICE_FUNC explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp()) @@ -74,24 +70,11 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING #define EIGEN_NO_STATIC_ASSERT #include "main.h" @@ -209,14 +211,20 @@ template void vectorwiseop_matrix(const MatrixType& m) m2 = m1; m2.rowwise().normalize(); VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized()); + + // test with partial reduction of products + Matrix m1m1 = m1 * m1.transpose(); + VERIFY_IS_APPROX( (m1 * m1.transpose()).colwise().sum(), m1m1.colwise().sum()); + Matrix tmp(rows); + VERIFY_EVALUATION_COUNT( tmp = (m1 * m1.transpose()).colwise().sum(), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0)); } void test_vectorwiseop() { - CALL_SUBTEST_1(vectorwiseop_array(Array22cd())); - CALL_SUBTEST_2(vectorwiseop_array(Array())); - CALL_SUBTEST_3(vectorwiseop_array(ArrayXXf(3, 4))); - CALL_SUBTEST_4(vectorwiseop_matrix(Matrix4cf())); - CALL_SUBTEST_5(vectorwiseop_matrix(Matrix())); - CALL_SUBTEST_6(vectorwiseop_matrix(MatrixXd(7,2))); + CALL_SUBTEST_1( vectorwiseop_array(Array22cd()) ); + CALL_SUBTEST_2( vectorwiseop_array(Array()) ); + CALL_SUBTEST_3( vectorwiseop_array(ArrayXXf(3, 4)) ); + CALL_SUBTEST_4( vectorwiseop_matrix(Matrix4cf()) ); + CALL_SUBTEST_5( vectorwiseop_matrix(Matrix()) ); + CALL_SUBTEST_6( vectorwiseop_matrix(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } From 412c049ba47eedc45f0943db7094bf5308c681c2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 16:27:54 +0200 Subject: [PATCH 068/344] Fix a warning --- test/geo_alignedbox.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp index c883234a8..2bdb4b7f2 100644 --- a/test/geo_alignedbox.cpp +++ b/test/geo_alignedbox.cpp @@ -16,7 +16,7 @@ using namespace std; template EIGEN_DONT_INLINE -void kill_extra_precision(T& x) { eigen_assert(&x != 0); } +void kill_extra_precision(T& x) { eigen_assert((void*)(&x) != (void*)0); } template void alignedbox(const BoxType& _box) From 67bfba07fde58e0cc983486f49e79fc29fbeee49 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 16:30:28 +0200 Subject: [PATCH 069/344] Fix some CUDA issues --- Eigen/src/Core/VectorwiseOp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 5de53732e..dbc272dae 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -70,7 +70,10 @@ class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr class VectorwiseOp * Output: \verbinclude PartialRedux_norm.out * * \sa DenseBase::norm() */ - EIGEN_DEVICE_FUNC template + EIGEN_DEVICE_FUNC const typename LpNormReturnType

::Type lpNorm() const { return typename LpNormReturnType

::Type(_expression()); } From f6f6f50272164a523126f5371ac51a2c8f5bae8e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 16:34:33 +0200 Subject: [PATCH 070/344] Clean evaluator --- Eigen/src/Core/CoreEvaluators.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index b96ef99fa..74908bd45 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -1337,20 +1337,16 @@ struct evaluator > typedef evaluator Base; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) + : m_result(xpr.arg()) { ::new (static_cast(this)) Base(m_result); - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, xpr.arg()); } // This constructor is used when nesting an EvalTo evaluator in another evaluator EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) - : m_result(arg.rows(), arg.cols()) + : m_result(arg) { ::new (static_cast(this)) Base(m_result); - // TODO we should simply do m_result(xpr.arg()); - call_dense_assignment_loop(m_result, arg); } protected: From dd934ad057e5ad5a5bc9e9a2b4340b4ac16c01d3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 17:27:01 +0200 Subject: [PATCH 071/344] Re-enable vectorization of LinSpaced, plus some cleaning --- Eigen/src/Core/CoreEvaluators.h | 4 ++-- Eigen/src/Core/GeneralProduct.h | 14 +++++++------- Eigen/src/Core/Inverse.h | 2 -- Eigen/src/Core/ProductEvaluators.h | 2 +- Eigen/src/Core/functors/NullaryFunctors.h | 3 --- test/nullary.cpp | 2 ++ 6 files changed, 12 insertions(+), 15 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 74908bd45..c0563f534 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -321,8 +321,8 @@ struct evaluator > & ( HereditaryBits | (functor_has_linear_access::ret ? LinearAccessBit : 0) | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), // FIXME EvalBeforeNestingBit should be needed anymore - Alignment = 0 // FIXME alignment should not matter here, perhaps we could set it to AlignMax?? + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 475d6f4aa..4a5054592 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -160,7 +160,7 @@ template<> struct product_type_selector { enum namespace internal { template -struct gemv_dense_sense_selector; +struct gemv_dense_selector; } // end namespace internal @@ -204,19 +204,19 @@ struct gemv_static_vector_if // The vector is on the left => transposition template -struct gemv_dense_sense_selector +struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { Transpose destT(dest); enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_dense_sense_selector + gemv_dense_selector ::run(rhs.transpose(), lhs.transpose(), destT, alpha); } }; -template<> struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) @@ -292,7 +292,7 @@ template<> struct gemv_dense_sense_selector } }; -template<> struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) @@ -345,7 +345,7 @@ template<> struct gemv_dense_sense_selector } }; -template<> struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) @@ -357,7 +357,7 @@ template<> struct gemv_dense_sense_selector } }; -template<> struct gemv_dense_sense_selector +template<> struct gemv_dense_selector { template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index b359e1287..8e6b275f7 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -12,8 +12,6 @@ namespace Eigen { -// TODO move the general declaration in Core, and rename this file DenseInverseImpl.h, or something like this... - template class InverseImpl; namespace internal { diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 529870300..c2a7314bd 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -313,7 +313,7 @@ struct generic_product_impl template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - internal::gemv_dense_sense_selector::HasUsableDirectAccess) >::run(lhs, rhs, dst, alpha); diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 130f20868..55d45f26f 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -26,7 +26,6 @@ struct scalar_constant_op { }; template struct functor_traits > -// FIXME replace this packet test by a safe one { enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; template struct scalar_identity_op { @@ -135,14 +134,12 @@ template struct linspa // This proxy object handles the actual required temporaries, the different // implementations (random vs. sequential access) as well as the // correct piping to size 2/4 packet operations. - // TODO find a way to make the packet type configurable const linspaced_op_impl impl; }; // all functors allow linear access, except scalar_identity_op. So we fix here a quick meta // to indicate whether a functor allows linear access, just always answering 'yes' except for // scalar_identity_op. -// FIXME move this to functor_traits adding a functor_default template struct functor_has_linear_access { enum { ret = 1 }; }; template struct functor_has_linear_access > { enum { ret = 0 }; }; diff --git a/test/nullary.cpp b/test/nullary.cpp index 2c148e205..4844f2952 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -123,6 +123,8 @@ void test_nullary() CALL_SUBTEST_6( testVectorType(Vector3d()) ); CALL_SUBTEST_7( testVectorType(VectorXf(internal::random(1,300))) ); CALL_SUBTEST_8( testVectorType(Vector3f()) ); + CALL_SUBTEST_8( testVectorType(Vector4f()) ); + CALL_SUBTEST_8( testVectorType(Matrix()) ); CALL_SUBTEST_8( testVectorType(Matrix()) ); } } From 8d00a953af6d84413abd0c8941db6666f4e0bf4e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 17:36:57 +0200 Subject: [PATCH 072/344] Fix a nesting issue in some matrix-vector cases. --- Eigen/src/Core/GeneralProduct.h | 9 +++++---- test/product_notemporary.cpp | 5 +++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 4a5054592..fe8204ac3 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -350,10 +350,11 @@ template<> struct gemv_dense_selector template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - // TODO makes sure dest is sequentially stored in memory, otherwise use a temp + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); const Index size = rhs.rows(); for(Index k=0; k struct gemv_dense_selector template static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); const Index rows = dest.rows(); for(Index i=0; i void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.col(0), 0 ); VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * rv1.adjoint(), 0 ); VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.row(0).transpose(), 0 ); + + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * (m1*cv1), 1 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * (m1*cv1), 1 ); } void test_product_notemporary() From d866279364414235b531ab9c03eda6a14d9da1c5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 18:36:39 +0200 Subject: [PATCH 073/344] Clean a bit the implementation of inverse permutations --- Eigen/Core | 4 +- Eigen/src/Core/Inverse.h | 3 +- Eigen/src/Core/PermutationMatrix.h | 93 +++++++++--------------- Eigen/src/Core/ProductEvaluators.h | 8 +- Eigen/src/SparseCore/SparsePermutation.h | 17 ++--- 5 files changed, 50 insertions(+), 75 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index 713d18a6d..7cf431320 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -382,8 +382,6 @@ using std::ptrdiff_t; #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" #include "src/Core/DiagonalProduct.h" -#include "src/Core/PermutationMatrix.h" -#include "src/Core/Transpositions.h" #include "src/Core/Redux.h" #include "src/Core/Visitor.h" #include "src/Core/Fuzzy.h" @@ -393,6 +391,8 @@ using std::ptrdiff_t; #include "src/Core/GeneralProduct.h" #include "src/Core/Solve.h" #include "src/Core/Inverse.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/products/GeneralBlockPanelKernel.h" diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index 8e6b275f7..8ba1a12d9 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -47,11 +47,12 @@ public: typedef typename XprType::PlainObject PlainObject; typedef typename internal::ref_selector::type XprTypeNested; typedef typename internal::remove_all::type XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; explicit Inverse(const XprType &xpr) : m_xpr(xpr) {} - + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index b5f7e4a54..90e1df233 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2009 Benoit Jacob -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -13,9 +13,6 @@ namespace Eigen { -// TODO: this does not seems to be needed at all: -// template class PermutedImpl; - /** \class PermutationBase * \ingroup Core_Module * @@ -67,8 +64,9 @@ class PermutationBase : public EigenBase DenseMatrixType; typedef PermutationMatrix PlainPermutationType; + typedef PlainPermutationType PlainObject; using Base::derived; - typedef Transpose TransposeReturnType; + typedef Inverse InverseReturnType; typedef void Scalar; #endif @@ -196,14 +194,14 @@ class PermutationBase : public EigenBase * * \note \note_try_to_help_rvo */ - inline TransposeReturnType inverse() const - { return TransposeReturnType(derived()); } + inline InverseReturnType inverse() const + { return InverseReturnType(derived()); } /** \returns the tranpose permutation matrix. * * \note \note_try_to_help_rvo */ - inline TransposeReturnType transpose() const - { return TransposeReturnType(derived()); } + inline InverseReturnType transpose() const + { return InverseReturnType(derived()); } /**** multiplication helpers to hopefully get RVO ****/ @@ -238,7 +236,7 @@ class PermutationBase : public EigenBase * \note \note_try_to_help_rvo */ template - inline PlainPermutationType operator*(const Transpose >& other) const + inline PlainPermutationType operator*(const InverseImpl& other) const { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } /** \returns the product of an inverse permutation with another permutation. @@ -246,7 +244,7 @@ class PermutationBase : public EigenBase * \note \note_try_to_help_rvo */ template friend - inline PlainPermutationType operator*(const Transpose >& other, const PermutationBase& perm) + inline PlainPermutationType operator*(const InverseImpl& other, const PermutationBase& perm) { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. @@ -398,13 +396,13 @@ class PermutationMatrix : public PermutationBase - PermutationMatrix(const Transpose >& other) - : m_indices(other.nestedExpression().size()) + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) { eigen_internal_assert(m_indices.size() <= NumTraits::highest()); StorageIndex end = StorageIndex(m_indices.size()); for (StorageIndex i=0; i PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) @@ -564,84 +562,61 @@ operator*(const PermutationBase &permutation, (permutation.derived(), matrix.derived()); } -namespace internal { -/* Template partial specialization for transposed/inverse permutations */ - -template -struct traits > > - : traits -{}; - -} // end namespace internal - -// TODO: the specificties should be handled by the evaluator, -// at the very least we should only specialize TransposeImpl -template -class Transpose > - : public EigenBase > > +template +class InverseImpl + : public EigenBase > { - typedef Derived PermutationType; - typedef typename PermutationType::IndicesType IndicesType; typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + protected: + InverseImpl() {} public: + typedef Inverse InverseType; + using EigenBase >::derived; #ifndef EIGEN_PARSED_BY_DOXYGEN - typedef internal::traits Traits; - typedef typename Derived::DenseMatrixType DenseMatrixType; + typedef typename PermutationType::DenseMatrixType DenseMatrixType; enum { - Flags = Traits::Flags, - RowsAtCompileTime = Traits::RowsAtCompileTime, - ColsAtCompileTime = Traits::ColsAtCompileTime, - MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; - typedef typename Traits::StorageIndex StorageIndex; #endif - Transpose(const PermutationType& p) : m_permutation(p) {} - - inline Index rows() const { return m_permutation.rows(); } - inline Index cols() const { return m_permutation.cols(); } - #ifndef EIGEN_PARSED_BY_DOXYGEN template void evalTo(MatrixBase& other) const { other.setZero(); - for (Index i=0; i friend - const Product - operator*(const MatrixBase& matrix, const Transpose& trPerm) + const Product + operator*(const MatrixBase& matrix, const InverseType& trPerm) { - return Product(matrix.derived(), trPerm.derived()); + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - const Product + const Product operator*(const MatrixBase& matrix) const { - return Product(*this, matrix.derived()); + return Product(derived(), matrix.derived()); } - - const PermutationType& nestedExpression() const { return m_permutation; } - - protected: - const PermutationType& m_permutation; }; template diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index c2a7314bd..7dea15a30 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -908,20 +908,20 @@ struct generic_product_impl }; template -struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> { template - static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) { permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); } }; template -struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> { template - static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) { permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); } diff --git a/Eigen/src/SparseCore/SparsePermutation.h b/Eigen/src/SparseCore/SparsePermutation.h index d63607b6c..3c58e3b4f 100644 --- a/Eigen/src/SparseCore/SparsePermutation.h +++ b/Eigen/src/SparseCore/SparsePermutation.h @@ -144,23 +144,22 @@ operator*( const PermutationBase& perm, const SparseMatrixBase(perm.derived(), matrix.derived()); } -// TODO, the following specializations should not be needed as Transpose should be a PermutationBase. /** \returns the matrix with the inverse permutation applied to the columns. */ -template -inline const Product > > -operator*(const SparseMatrixBase& matrix, const Transpose >& tperm) +template +inline const Product > +operator*(const SparseMatrixBase& matrix, const InverseImpl& tperm) { - return Product > >(matrix.derived(), tperm); + return Product >(matrix.derived(), tperm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ -template -inline const Product >, SparseDerived> -operator*(const Transpose >& tperm, const SparseMatrixBase& matrix) +template +inline const Product, SparseDerived> +operator*(const InverseImpl& tperm, const SparseMatrixBase& matrix) { - return Product >, SparseDerived>(tperm, matrix.derived()); + return Product, SparseDerived>(tperm.derived(), matrix.derived()); } } // end namespace Eigen From 4140ee039d1b9adc7ec0e747e466ee073d239d29 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 21:41:27 +0200 Subject: [PATCH 074/344] Fix propagation of AssumeAliasing for expression as: "scalar * (A*B)" --- Eigen/src/Core/ProductEvaluators.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 7dea15a30..a40113325 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -38,6 +38,12 @@ struct evaluator > // Catch scalar * ( A * B ) and transform it to (A*scalar) * B // TODO we should apply that rule only if that's really helpful template +struct evaluator_traits, const Product > > + : evaluator_traits_base, const Product > > +{ + enum { AssumeAliasing = 1 }; +}; +template struct evaluator, const Product > > : public evaluator,const Lhs>, Rhs, DefaultProduct> > { From c9718514f59844f1811685c6703389fc95195864 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 21:41:53 +0200 Subject: [PATCH 075/344] Fix nesting sub-expression in outer-products --- Eigen/src/Core/ProductEvaluators.h | 12 ++++++------ test/product_notemporary.cpp | 13 ++++++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index a40113325..7099d1fdc 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -218,12 +218,12 @@ template EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) { evaluator rhsEval(rhs); - // FIXME make sure lhs is sequentially stored + typename nested_eval::type actual_lhs(lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored // FIXME not very good if rhs is real and lhs complex while alpha is real too - // FIXME we should probably build an evaluator for dst const Index cols = dst.cols(); for (Index j=0; j EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) { evaluator lhsEval(lhs); - // FIXME make sure rhs is sequentially stored + typename nested_eval::type actual_rhs(rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored // FIXME not very good if lhs is real and rhs complex while alpha is real too - // FIXME we should probably build an evaluator for dst const Index rows = dst.rows(); for (Index i=0; i diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 94b5f712d..16f6266f7 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -62,7 +62,7 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).template triangularView() * m2, 0); VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * (m2+m2), 1); VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * m2.adjoint(), 0); - + VERIFY_EVALUATION_COUNT( m3.template triangularView() = (m1 * m2.adjoint()), 0); VERIFY_EVALUATION_COUNT( m3.template triangularView() -= (m1 * m2.adjoint()), 0); @@ -112,6 +112,17 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * cv1, 0 ); VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * (m1*cv1), 1 ); VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * (m1*cv1), 1 ); + + // Check outer products + m3 = cv1 * rv1; + VERIFY_EVALUATION_COUNT( m3.noalias() = cv1 * rv1, 0 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (cv1+cv1) * (rv1+rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() += (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() -= (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (m1*cv1) * (rv1 * m1), 2 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() += (m1*cv1) * (rv1 * m1), 2 ); } void test_product_notemporary() From 186ec1437cc59414ff7539cb06637c8de2be2ee9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 8 Oct 2015 22:06:49 +0200 Subject: [PATCH 076/344] Cleanup EIGEN_SPARSE_PUBLIC_INTERFACE, it is now a simple alias to EIGEN_GENERIC_PUBLIC_INTERFACE --- Eigen/src/Core/util/Macros.h | 9 ++++---- Eigen/src/SparseCore/SparseBlock.h | 11 ++++++--- Eigen/src/SparseCore/SparseCompressedBase.h | 2 +- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 1 + Eigen/src/SparseCore/SparseMap.h | 4 ++-- Eigen/src/SparseCore/SparseMatrix.h | 5 ++-- Eigen/src/SparseCore/SparseRef.h | 6 ++--- Eigen/src/SparseCore/SparseTriangularView.h | 3 ++- Eigen/src/SparseCore/SparseUtil.h | 23 ++++--------------- Eigen/src/SparseCore/SparseVector.h | 4 ++-- Eigen/src/SparseCore/SparseView.h | 1 + .../src/SparseExtra/DynamicSparseMatrix.h | 2 ++ 12 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index abc69f866..f9741e5c6 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -761,16 +761,17 @@ namespace Eigen { Flags = Eigen::internal::traits::Flags, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ - IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; + IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ + using Base::derived; \ + using Base::const_cast_derived; #define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ typedef typename Base::PacketScalar PacketScalar; \ enum { MaxRowsAtCompileTime = Eigen::internal::traits::MaxRowsAtCompileTime, \ - MaxColsAtCompileTime = Eigen::internal::traits::MaxColsAtCompileTime}; \ - using Base::derived; \ - using Base::const_cast_derived; + MaxColsAtCompileTime = Eigen::internal::traits::MaxColsAtCompileTime}; + #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b) #define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b) diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index 202bf9a12..10be84856 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h @@ -23,6 +23,8 @@ public: enum { IsRowMajor = internal::traits::IsRowMajor }; protected: enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; + typedef SparseMatrixBase Base; + using Base::convert_index; public: EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) @@ -88,10 +90,11 @@ class sparse_matrix_block_impl { typedef typename internal::remove_all::type _MatrixTypeNested; typedef Block BlockType; + typedef SparseCompressedBase > Base; + using Base::convert_index; public: enum { IsRowMajor = internal::traits::IsRowMajor }; - typedef SparseCompressedBase > Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) + EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) protected: typedef typename Base::IndexVector IndexVector; enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; @@ -359,7 +362,9 @@ template class BlockImpl : public SparseMatrixBase >, internal::no_assignment_operator { - typedef Block BlockType; + typedef Block BlockType; + typedef SparseMatrixBase Base; + using Base::convert_index; public: enum { IsRowMajor = internal::traits::IsRowMajor }; EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index ad3085a55..97ca44761 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -28,7 +28,7 @@ class SparseCompressedBase { public: typedef SparseMatrixBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseCompressedBase) + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseCompressedBase) using Base::operator=; using Base::IsRowMajor; diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 973b80095..b87b6b749 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -35,6 +35,7 @@ class CwiseBinaryOpImpl { public: typedef CwiseBinaryOp Derived; + typedef SparseMatrixBase Base; EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) CwiseBinaryOpImpl() { diff --git a/Eigen/src/SparseCore/SparseMap.h b/Eigen/src/SparseCore/SparseMap.h index 349927905..36c09ab0c 100644 --- a/Eigen/src/SparseCore/SparseMap.h +++ b/Eigen/src/SparseCore/SparseMap.h @@ -191,7 +191,7 @@ class Map, Options, StrideType> { public: typedef SparseMapBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Map) + EIGEN_SPARSE_PUBLIC_INTERFACE(Map) enum { IsRowMajor = Base::IsRowMajor }; public: @@ -211,7 +211,7 @@ class Map, Options, StrideType { public: typedef SparseMapBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Map) + EIGEN_SPARSE_PUBLIC_INTERFACE(Map) enum { IsRowMajor = Base::IsRowMajor }; public: diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 14b196480..4cf3145fd 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -92,11 +92,12 @@ template class SparseMatrix : public SparseCompressedBase > { - public: typedef SparseCompressedBase Base; + using Base::convert_index; + public: using Base::isCompressed; using Base::nonZeros; - _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) using Base::operator+=; using Base::operator-=; diff --git a/Eigen/src/SparseCore/SparseRef.h b/Eigen/src/SparseCore/SparseRef.h index 262fd9aef..e10bf6878 100644 --- a/Eigen/src/SparseCore/SparseRef.h +++ b/Eigen/src/SparseCore/SparseRef.h @@ -58,7 +58,7 @@ template class SparseRefBase public: typedef SparseMapBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseRefBase) + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseRefBase) SparseRefBase() : Base(RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime, 0, 0, 0, 0, 0) @@ -104,7 +104,7 @@ class Ref, Options, StrideType > public: typedef internal::SparseRefBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -148,7 +148,7 @@ class Ref, Options, StrideType public: typedef internal::SparseRefBase Base; - _EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) + EIGEN_SPARSE_PUBLIC_INTERFACE(Ref) template inline Ref(const SparseMatrixBase& expr) diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 3d9946149..57d88893e 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -34,10 +34,11 @@ template class TriangularViewImpl TriangularViewType; -protected: + protected: // dummy solve function to make TriangularView happy. void solve() const; + typedef SparseMatrixBase Base; public: EIGEN_SPARSE_PUBLIC_INTERFACE(TriangularViewType) diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index d53a9cb17..75c604f79 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -39,26 +39,11 @@ EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \ #define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) -// TODO this is mostly the same as EIGEN_GENERIC_PUBLIC_INTERFACE -#define _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Eigen::internal::traits::Scalar Scalar; \ - typedef typename Eigen::NumTraits::Real RealScalar; \ - typedef typename Eigen::internal::ref_selector::type Nested; \ - typedef typename Eigen::internal::traits::StorageKind StorageKind; \ - typedef typename Eigen::internal::traits::StorageIndex StorageIndex; \ - enum { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ - ColsAtCompileTime = Eigen::internal::traits::ColsAtCompileTime, \ - Flags = Eigen::internal::traits::Flags, \ - SizeAtCompileTime = Base::SizeAtCompileTime, \ - IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ - using Base::derived; \ - using Base::const_cast_derived; \ - using Base::convert_index; - -#define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ - typedef Eigen::SparseMatrixBase Base; \ - _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) +#define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ + EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) + + const int CoherentAccessPattern = 0x1; const int InnerRandomAccessPattern = 0x2 | CoherentAccessPattern; const int OuterRandomAccessPattern = 0x4 | CoherentAccessPattern; diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index 38b5a53df..f941fa5e1 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -66,9 +66,9 @@ class SparseVector : public SparseCompressedBase > { typedef SparseCompressedBase Base; - + using Base::convert_index; public: - _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseVector) + EIGEN_SPARSE_PUBLIC_INTERFACE(SparseVector) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, +=) EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, -=) diff --git a/Eigen/src/SparseCore/SparseView.h b/Eigen/src/SparseCore/SparseView.h index 761e72038..c945c4dab 100644 --- a/Eigen/src/SparseCore/SparseView.h +++ b/Eigen/src/SparseCore/SparseView.h @@ -32,6 +32,7 @@ class SparseView : public SparseMatrixBase > { typedef typename MatrixType::Nested MatrixTypeNested; typedef typename internal::remove_all::type _MatrixTypeNested; + typedef SparseMatrixBase Base; public: EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView) typedef typename internal::remove_all::type NestedExpression; diff --git a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h index 54e0c5d63..037a13f86 100644 --- a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +++ b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h @@ -56,6 +56,8 @@ template class DynamicSparseMatrix : public SparseMatrixBase > { + typedef SparseMatrixBase Base; + using Base::convert_index; public: EIGEN_SPARSE_PUBLIC_INTERFACE(DynamicSparseMatrix) // FIXME: why are these operator already alvailable ??? From 1932a2476094eb148adf64715bcd9ecf9f4af062 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 10:21:54 +0200 Subject: [PATCH 077/344] Simplify EIGEN_DENSE_PUBLIC_INTERFACE --- Eigen/src/Core/util/Macros.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index f9741e5c6..bf894a518 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -747,8 +747,6 @@ namespace Eigen { * documentation in a single line. **/ -// TODO The EIGEN_DENSE_PUBLIC_INTERFACE should not exists anymore - #define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ typedef typename Eigen::internal::traits::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex. */ \ typedef typename Eigen::NumTraits::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex, T were corresponding to RealScalar. */ \ @@ -766,11 +764,10 @@ namespace Eigen { using Base::const_cast_derived; +// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed #define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::PacketScalar PacketScalar; \ - enum { MaxRowsAtCompileTime = Eigen::internal::traits::MaxRowsAtCompileTime, \ - MaxColsAtCompileTime = Eigen::internal::traits::MaxColsAtCompileTime}; + typedef typename Base::PacketScalar PacketScalar; #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b) From c2d68b984f4d02941405d47a2102ed2d0af03437 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 10:34:58 +0200 Subject: [PATCH 078/344] Optimize a bit complex selfadjoint * vector product. --- Eigen/src/Core/products/SelfadjointMatrixVector.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index f3443bd10..c743db011 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -44,6 +44,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product::type Packet; + typedef typename NumTraits::Real RealScalar; const Index PacketSize = sizeof(Packet)/sizeof(Scalar); enum { @@ -54,7 +55,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; - conj_helper::IsComplex, ConjugateRhs> cjd; + conj_helper cjd; conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; conj_helper::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; @@ -97,7 +98,6 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product Date: Fri, 9 Oct 2015 10:42:14 +0200 Subject: [PATCH 079/344] Remove dead code in selfadjoint_matrix_vector_product --- .../src/Core/products/SelfadjointMatrixVector.h | 16 +++------------- blas/level2_cplx_impl.h | 4 ++-- blas/level2_real_impl.h | 4 ++-- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index c743db011..d8d30267e 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -30,7 +30,7 @@ struct selfadjoint_matrix_vector_product static EIGEN_DONT_INLINE void run( Index size, const Scalar* lhs, Index lhsStride, - const Scalar* _rhs, Index rhsIncr, + const Scalar* rhs, Scalar* res, Scalar alpha); }; @@ -39,7 +39,7 @@ template::run( Index size, const Scalar* lhs, Index lhsStride, - const Scalar* _rhs, Index rhsIncr, + const Scalar* rhs, Scalar* res, Scalar alpha) { @@ -62,16 +62,6 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product(_rhs) : 0); - if (rhsIncr!=1) - { - const Scalar* it = _rhs; - for (Index i=0; i ( lhs.rows(), // size &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info - actualRhsPtr, 1, // rhs info + actualRhsPtr, // rhs info actualDestPtr, // result info actualAlpha // scale factor ); diff --git a/blas/level2_cplx_impl.h b/blas/level2_cplx_impl.h index afa9a7493..9b845de22 100644 --- a/blas/level2_cplx_impl.h +++ b/blas/level2_cplx_impl.h @@ -18,7 +18,7 @@ */ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { - typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); + typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); static functype func[2]; static bool init = false; @@ -67,7 +67,7 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa if(code>=2 || func[code]==0) return 0; - func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); + func[code](*n, a, *lda, actual_x, actual_y, alpha); } if(actual_x!=x) delete[] actual_x; diff --git a/blas/level2_real_impl.h b/blas/level2_real_impl.h index 9722a4674..cac89b268 100644 --- a/blas/level2_real_impl.h +++ b/blas/level2_real_impl.h @@ -12,7 +12,7 @@ // y = alpha*A*x + beta*y int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { - typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); + typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); static functype func[2]; static bool init = false; @@ -59,7 +59,7 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p if(code>=2 || func[code]==0) return 0; - func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); + func[code](*n, a, *lda, actual_x, actual_y, alpha); if(actual_x!=x) delete[] actual_x; if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); From 2c516ba38f86ce04c15f9cf2472638e619528b20 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 12:07:06 +0200 Subject: [PATCH 080/344] Remove auto references and referenced-by relation in doc. --- doc/Doxyfile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index e15ba84bd..e0c6a7e34 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -867,13 +867,13 @@ STRIP_CODE_COMMENTS = YES # then for each documented function all documented # functions referencing it will be listed. -REFERENCED_BY_RELATION = YES +REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. -REFERENCES_RELATION = YES +REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from From 72bd05b6d8240b60e294397ac02a13ad53ae6167 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 12:07:42 +0200 Subject: [PATCH 081/344] Cleaning in Redux.h --- Eigen/src/Core/Redux.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index c427a4d58..87b4a9c46 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -414,17 +414,7 @@ typename internal::traits::Scalar DenseBase::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); - - // FIXME, eval_nest should be handled by redux_evaluator, however: - // - it is currently difficult to provide the right Flags since they are still handled by the expressions - // - handling it here might reduce the number of template instantiations -// typedef typename internal::nested_eval::type ThisNested; -// typedef typename internal::remove_all::type ThisNestedCleaned; -// typedef typename internal::redux_evaluator ThisEvaluator; -// -// ThisNested thisNested(derived()); -// ThisEvaluator thisEval(thisNested); - + typedef typename internal::redux_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); From 1429daf85087a6c913a8274fd4c6827f9eb57aef Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 12:10:48 +0200 Subject: [PATCH 082/344] Add lvalue check for TriangularView::swap, and fix deprecated TriangularView::lazyAssign --- Eigen/src/Core/TriangularMatrix.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 63a1af8c1..e9b34ebdf 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -460,6 +460,7 @@ template class TriangularViewImpl<_Mat EIGEN_DEVICE_FUNC void swap(TriangularBase const & other) { + EIGEN_STATIC_ASSERT_LVALUE(OtherDerived); call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } @@ -468,6 +469,7 @@ template class TriangularViewImpl<_Mat EIGEN_DEVICE_FUNC void swap(MatrixBase const & other) { + EIGEN_STATIC_ASSERT_LVALUE(OtherDerived); call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } @@ -503,7 +505,7 @@ template template void TriangularViewImpl::lazyAssign(const MatrixBase& other) { - internal::call_assignment(derived().noalias(), other.template triangularView()); + internal::call_assignment_no_alias(derived(), other.template triangularView()); } @@ -523,7 +525,7 @@ template void TriangularViewImpl::lazyAssign(const TriangularBase& other) { eigen_assert(Mode == int(OtherDerived::Mode)); - internal::call_assignment(derived().noalias(), other.derived()); + internal::call_assignment_no_alias(derived(), other.derived()); } /*************************************************************************** From 2632b3446cf687e9e3feff2850d53f7928837474 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 12:10:58 +0200 Subject: [PATCH 083/344] Improve documentation of TriangularView. --- Eigen/src/Core/SolveTriangular.h | 28 ----------- Eigen/src/Core/TriangularMatrix.h | 81 ++++++++++++++++++++++++++----- Eigen/src/Core/util/Constants.h | 4 +- 3 files changed, 71 insertions(+), 42 deletions(-) diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index ded42e0e8..f8bb4b314 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -161,13 +161,6 @@ struct triangular_solver_selector { * TriangularView methods ***************************************************************************/ -/** "in-place" version of TriangularView::solve() where the result is written in \a other - * - * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. - * This function will const_cast it, so constness isn't honored here. - * - * See TriangularView:solve() for the details. - */ template template void TriangularViewImpl::solveInPlace(const MatrixBase& _other) const @@ -188,27 +181,6 @@ void TriangularViewImpl::solveInPlace(const MatrixBase template const internal::triangular_solve_retval,Other> diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index e9b34ebdf..438dd4dc9 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -222,18 +222,23 @@ template class TriangularView TriangularView& operator=(const TriangularView &other) { return Base::operator=(other); } + /** \copydoc EigenBase::rows() */ EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + /** \copydoc EigenBase::cols() */ EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + /** \returns a const reference to the nested expression */ EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; } + + /** \returns a reference to the nested expression */ EIGEN_DEVICE_FUNC NestedExpression& nestedExpression() { return *const_cast(&m_matrix); } - /** \sa MatrixBase::conjugate() const */ typedef TriangularView ConjugateReturnType; + /** \sa MatrixBase::conjugate() const */ EIGEN_DEVICE_FUNC inline const ConjugateReturnType conjugate() const { return ConjugateReturnType(m_matrix.conjugate()); } @@ -279,19 +284,28 @@ template class TriangularView using Base::solve; #endif - EIGEN_DEVICE_FUNC - const SelfAdjointView selfadjointView() const - { - EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); - return SelfAdjointView(m_matrix); - } + /** \returns a selfadjoint view of the referenced triangular part which must be either \c #Upper or \c #Lower. + * + * This is a shortcut for \code this->nestedExpression().selfadjointView<(*this)::Mode>() \endcode + * \sa MatrixBase::selfadjointView() */ EIGEN_DEVICE_FUNC SelfAdjointView selfadjointView() { - EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR); + EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR); return SelfAdjointView(m_matrix); } + /** This is the const version of selfadjointView() */ + EIGEN_DEVICE_FUNC + const SelfAdjointView selfadjointView() const + { + EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR); + return SelfAdjointView(m_matrix); + } + + + /** \returns the determinant of the triangular matrix + * \sa MatrixBase::determinant() */ EIGEN_DEVICE_FUNC Scalar determinant() const { @@ -341,12 +355,16 @@ template class TriangularViewImpl<_Mat Flags = internal::traits::Flags }; + /** \returns the outer-stride of the underlying dense matrix + * \sa DenseCoeffsBase::outerStride() */ EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); } + /** \returns the inner-stride of the underlying dense matrix + * \sa DenseCoeffsBase::innerStride() */ EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); } - /** \sa MatrixBase::operator+=() */ + /** \sa MatrixBase::operator+=() */ template EIGEN_DEVICE_FUNC TriangularViewType& operator+=(const DenseBase& other) { @@ -364,7 +382,7 @@ template class TriangularViewImpl<_Mat /** \sa MatrixBase::operator*=() */ EIGEN_DEVICE_FUNC TriangularViewType& operator*=(const typename internal::traits::Scalar& other) { return *this = derived().nestedExpression() * other; } - /** \sa MatrixBase::operator/=() */ + /** \sa DenseBase::operator/=() */ EIGEN_DEVICE_FUNC TriangularViewType& operator/=(const typename internal::traits::Scalar& other) { return *this = derived().nestedExpression() / other; } @@ -408,21 +426,26 @@ template class TriangularViewImpl<_Mat EIGEN_DEVICE_FUNC TriangularViewType& operator=(const TriangularBase& other); + /** Shortcut for\code *this = other.other.triangularView<(*this)::Mode>() \endcode */ template EIGEN_DEVICE_FUNC TriangularViewType& operator=(const MatrixBase& other); +#ifndef EIGEN_PARSED_BY_DOXYGEN EIGEN_DEVICE_FUNC TriangularViewType& operator=(const TriangularViewImpl& other) { return *this = other.derived().nestedExpression(); } + /** \deprecated */ template EIGEN_DEVICE_FUNC void lazyAssign(const TriangularBase& other); + /** \deprecated */ template EIGEN_DEVICE_FUNC - void lazyAssign(const MatrixBase& other); + void lazyAssign(const MatrixBase& other); +#endif /** Efficient triangular matrix times vector/matrix product */ template @@ -442,11 +465,39 @@ template class TriangularViewImpl<_Mat return Product(lhs.derived(),rhs.derived()); } + /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular. + * + * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if + * \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if + * \a Side==OnTheRight. + * + * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the + * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this + * is an upper (resp. lower) triangular matrix. + * + * Example: \include Triangular_solve.cpp + * Output: \verbinclude Triangular_solve.out + * + * This function returns an expression of the inverse-multiply and can works in-place if it is assigned + * to the same matrix or vector \a other. + * + * For users coming from BLAS, this function (and more specifically solveInPlace()) offer + * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines. + * + * \sa TriangularView::solveInPlace() + */ template EIGEN_DEVICE_FUNC inline const internal::triangular_solve_retval solve(const MatrixBase& other) const; + /** "in-place" version of TriangularView::solve() where the result is written in \a other + * + * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. + * This function will const_cast it, so constness isn't honored here. + * + * See TriangularView:solve() for the details. + */ template EIGEN_DEVICE_FUNC void solveInPlace(const MatrixBase& other) const; @@ -456,15 +507,21 @@ template class TriangularViewImpl<_Mat void solveInPlace(const MatrixBase& other) const { return solveInPlace(other); } + /** Swaps the coefficients of the common triangular parts of two matrices */ template EIGEN_DEVICE_FUNC +#ifdef EIGEN_PARSED_BY_DOXYGEN + void swap(TriangularBase &other) +#else void swap(TriangularBase const & other) +#endif { EIGEN_STATIC_ASSERT_LVALUE(OtherDerived); call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } - // TODO: this overload is ambiguous and it should be deprecated (Gael) + /** \deprecated + * Shortcut for \code (*this).swap(other.triangularView<(*this)::Mode>()) \endcode */ template EIGEN_DEVICE_FUNC void swap(MatrixBase const & other) diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 8fcde07d3..ddb1cf6f4 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -189,8 +189,8 @@ const unsigned int HereditaryBits = RowMajorBit */ /** \ingroup enums - * Enum containing possible values for the \p Mode parameter of - * MatrixBase::selfadjointView() and MatrixBase::triangularView(). */ + * Enum containing possible values for the \c Mode or \c UpLo parameter of + * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */ enum { /** View matrix as a lower triangular matrix. */ Lower=0x1, From 321cb56bf67c05fa94a518805e2f9be263902f28 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 13:29:39 +0200 Subject: [PATCH 084/344] Add unit test to check nesting of complex expressions in redux() --- test/redux.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/redux.cpp b/test/redux.cpp index 0d176e500..f3e7cc2a7 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -7,6 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING + #include "main.h" template void matrixRedux(const MatrixType& m) @@ -57,6 +59,11 @@ template void matrixRedux(const MatrixType& m) // test empty objects VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0)); VERIFY_IS_APPROX(m1.block(r0,c0,0,0).prod(), Scalar(1)); + + // test nesting complex expression + VERIFY_EVALUATION_COUNT( (m1.matrix()*m1.matrix().transpose()).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); + VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())*Scalar(2)).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); + } template void vectorRedux(const VectorType& w) From 78b8c344b521231e4953b385cc15532000d6e580 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 14:28:48 +0200 Subject: [PATCH 085/344] Add unit test for CoeffReadCost --- test/evaluators.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 12dc1ffef..876dffe22 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -94,6 +94,7 @@ namespace Eigen { } +template long get_cost(const XprType& ) { return Eigen::internal::evaluator::CoeffReadCost; } using namespace std; @@ -470,7 +471,6 @@ void test_evaluators() VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.triangularView(),A), MatrixXd(A.triangularView()*A)); VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.selfadjointView(),A), MatrixXd(A.selfadjointView()*A)); - } { @@ -481,6 +481,19 @@ void test_evaluators() VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(d.asDiagonal(),A), MatrixXd(d.asDiagonal()*A)); VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(A,d.asDiagonal()), MatrixXd(A*d.asDiagonal())); - + } + + { + // test CoeffReadCost + Matrix4d a, b; + VERIFY_IS_EQUAL( get_cost(a), 1 ); + VERIFY_IS_EQUAL( get_cost(a+b), 3); + VERIFY_IS_EQUAL( get_cost(2*a+b), 4); + VERIFY_IS_EQUAL( get_cost(a*b), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a*b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a*b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15); } } From 515ecddb97da7f239747c8d547bfb9ed5d63ae0f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 14:29:46 +0200 Subject: [PATCH 086/344] Add unit test for nested_eval --- test/nesting_ops.cpp | 84 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 6e772c70f..15b231d2e 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -2,14 +2,35 @@ // for linear algebra. // // Copyright (C) 2010 Hauke Heibel +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING + #include "main.h" -template void run_nesting_ops(const MatrixType& _m) +template +void use_n_times(const XprType &xpr) +{ + typename internal::nested_eval::type mat(xpr); + typename XprType::PlainObject res(mat.rows(), mat.cols()); + nb_temporaries--; // remove res + res.setZero(); + for(int i=0; i +bool verify_eval_type(const XprType &, const ReferenceType&) +{ + typedef typename internal::nested_eval::type EvalType; + return internal::is_same::type, typename internal::remove_all::type>::value; +} + +template void run_nesting_ops_1(const MatrixType& _m) { typename internal::nested_eval::type m(_m); @@ -24,10 +45,63 @@ template void run_nesting_ops(const MatrixType& _m) VERIFY_IS_APPROX( (m.transpose() * m).array().abs().sum(), (m.transpose() * m).array().abs().sum() ); } +template void run_nesting_ops_2(const MatrixType& _m) +{ + Index rows = _m.rows(); + Index cols = _m.cols(); + MatrixType m1 = MatrixType::Random(rows,cols); + + if((MatrixType::SizeAtCompileTime==Dynamic)) + { + + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1), 0 ); + if(!NumTraits::IsComplex) + { + VERIFY_EVALUATION_COUNT( use_n_times<3>(2*m1), 0 ); + VERIFY_EVALUATION_COUNT( use_n_times<4>(2*m1), 1 ); + } + else + { + VERIFY_EVALUATION_COUNT( use_n_times<1>(2*m1), 0 ); + VERIFY_EVALUATION_COUNT( use_n_times<2>(2*m1), 1 ); + } + VERIFY_EVALUATION_COUNT( use_n_times<2>(m1+m1), 0 ); + VERIFY_EVALUATION_COUNT( use_n_times<3>(m1+m1), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1*m1.transpose()), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<2>(m1*m1.transpose()), 1 ); + } + + { + VERIFY( verify_eval_type<10>(m1, m1) ); + if(!NumTraits::IsComplex) + { + VERIFY( verify_eval_type<3>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<4>(2*m1, m1) ); + } + else + { + VERIFY( verify_eval_type<1>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<2>(2*m1, m1) ); + } + VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); + VERIFY( verify_eval_type<3>(m1+m1, m1) ); + VERIFY( verify_eval_type<1>(m1*m1.transpose(), m1) ); + VERIFY( verify_eval_type<1>(m1*(m1+m1).transpose(), m1) ); + VERIFY( verify_eval_type<2>(m1*m1.transpose(), m1) ); + } +} + + void test_nesting_ops() { - CALL_SUBTEST_1(run_nesting_ops(MatrixXf::Random(25,25))); - CALL_SUBTEST_2(run_nesting_ops(MatrixXd::Random(25,25))); - CALL_SUBTEST_3(run_nesting_ops(Matrix4f::Random())); - CALL_SUBTEST_4(run_nesting_ops(Matrix4d::Random())); + CALL_SUBTEST_1(run_nesting_ops_1(MatrixXf::Random(25,25))); + CALL_SUBTEST_2(run_nesting_ops_1(MatrixXcd::Random(25,25))); + CALL_SUBTEST_3(run_nesting_ops_1(Matrix4f::Random())); + CALL_SUBTEST_4(run_nesting_ops_1(Matrix2d::Random())); + + CALL_SUBTEST_1( run_nesting_ops_2(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_2( run_nesting_ops_2(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_3( run_nesting_ops_2(Matrix4f()) ); + CALL_SUBTEST_4( run_nesting_ops_2(Matrix2d()) ); + } From ae38910693d9ff96eb5be3ac6cd9a075fc8f7da9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 14:57:19 +0200 Subject: [PATCH 087/344] The evalautor of Solve was missing the EvalBeforeNestingBit flag. --- Eigen/src/Core/Solve.h | 2 ++ test/nesting_ops.cpp | 26 +++++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index 4857a7c42..cb06028b1 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -118,6 +118,8 @@ struct evaluator > typedef Solve SolveType; typedef typename SolveType::PlainObject PlainObject; typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve) : m_result(solve.rows(), solve.cols()) diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 15b231d2e..1e96075f8 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -47,6 +47,7 @@ template void run_nesting_ops_1(const MatrixType& _m) template void run_nesting_ops_2(const MatrixType& _m) { + typedef typename MatrixType::Scalar Scalar; Index rows = _m.rows(); Index cols = _m.cols(); MatrixType m1 = MatrixType::Random(rows,cols); @@ -55,7 +56,7 @@ template void run_nesting_ops_2(const MatrixType& _m) { VERIFY_EVALUATION_COUNT( use_n_times<10>(m1), 0 ); - if(!NumTraits::IsComplex) + if(!NumTraits::IsComplex) { VERIFY_EVALUATION_COUNT( use_n_times<3>(2*m1), 0 ); VERIFY_EVALUATION_COUNT( use_n_times<4>(2*m1), 1 ); @@ -69,11 +70,21 @@ template void run_nesting_ops_2(const MatrixType& _m) VERIFY_EVALUATION_COUNT( use_n_times<3>(m1+m1), 1 ); VERIFY_EVALUATION_COUNT( use_n_times<1>(m1*m1.transpose()), 1 ); VERIFY_EVALUATION_COUNT( use_n_times<2>(m1*m1.transpose()), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 2 ); // FIXME should already be 1 thanks the already existing rule + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 2 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.template triangularView().solve(m1.col(0))), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.template triangularView().solve(m1.col(0))), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(Scalar(2)*m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by applying the scaling in-place on the solve result + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by adding m1.col() inplace + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); } { VERIFY( verify_eval_type<10>(m1, m1) ); - if(!NumTraits::IsComplex) + if(!NumTraits::IsComplex) { VERIFY( verify_eval_type<3>(2*m1, 2*m1) ); VERIFY( verify_eval_type<4>(2*m1, m1) ); @@ -88,6 +99,10 @@ template void run_nesting_ops_2(const MatrixType& _m) VERIFY( verify_eval_type<1>(m1*m1.transpose(), m1) ); VERIFY( verify_eval_type<1>(m1*(m1+m1).transpose(), m1) ); VERIFY( verify_eval_type<2>(m1*m1.transpose(), m1) ); + VERIFY( verify_eval_type<1>(m1+m1*m1, m1) ); + + VERIFY( verify_eval_type<1>(m1.template triangularView().solve(m1), m1) ); + VERIFY( verify_eval_type<1>(m1+m1.template triangularView().solve(m1), m1) ); } } @@ -99,9 +114,10 @@ void test_nesting_ops() CALL_SUBTEST_3(run_nesting_ops_1(Matrix4f::Random())); CALL_SUBTEST_4(run_nesting_ops_1(Matrix2d::Random())); - CALL_SUBTEST_1( run_nesting_ops_2(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_2( run_nesting_ops_2(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + Index s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_1( run_nesting_ops_2(MatrixXf(s,s)) ); + CALL_SUBTEST_2( run_nesting_ops_2(MatrixXcd(s,s)) ); CALL_SUBTEST_3( run_nesting_ops_2(Matrix4f()) ); CALL_SUBTEST_4( run_nesting_ops_2(Matrix2d()) ); - + TEST_SET_BUT_UNUSED_VARIABLE(s) } From a4cc4c1e5e0ca1ee46818cda81793e4b5e586221 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 14:57:51 +0200 Subject: [PATCH 088/344] Clarify note in nested_eval for evaluator creating temporaries. --- Eigen/src/Core/util/XprHelper.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 052f824ae..c85a6be80 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -396,9 +396,11 @@ template::type> struc DynamicAsInteger = 10000, ScalarReadCost = NumTraits::Scalar>::ReadCost, ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), - CoeffReadCost = evaluator::CoeffReadCost, // TODO What if an evaluator evaluate itself into a tempory? - // Then CoeffReadCost will be small but we still have to evaluate if n>1... - // The solution might be to ask the evaluator if it creates a temp. Perhaps we could even ask the number of temps? + CoeffReadCost = evaluator::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory? + // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. + // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON + // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. + // Another solution could be to count the number of temps? CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, From 6536b4bad71de912f240c6fffa6fd29e5b7b4bbc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 9 Oct 2015 15:28:09 +0200 Subject: [PATCH 089/344] Implement temporary-free path for "D.nolias() ?= C + A*B". (I thought it was already implemented) --- Eigen/src/Core/ProductEvaluators.h | 31 ++++++++++++++++++++++++++++++ test/nesting_ops.cpp | 21 ++------------------ test/product.h | 9 +++++++++ test/product_notemporary.cpp | 4 ++++ test/redux.cpp | 1 + 5 files changed, 47 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 7099d1fdc..eebd1a9d4 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -187,6 +187,37 @@ struct Assignment" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct assignment_from_xpr_plus_product +{ + typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + { + call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::add_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::sub_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > +{}; +//---------------------------------------- template struct generic_product_impl diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 1e96075f8..76a63400c 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -54,25 +54,8 @@ template void run_nesting_ops_2(const MatrixType& _m) if((MatrixType::SizeAtCompileTime==Dynamic)) { - - VERIFY_EVALUATION_COUNT( use_n_times<10>(m1), 0 ); - if(!NumTraits::IsComplex) - { - VERIFY_EVALUATION_COUNT( use_n_times<3>(2*m1), 0 ); - VERIFY_EVALUATION_COUNT( use_n_times<4>(2*m1), 1 ); - } - else - { - VERIFY_EVALUATION_COUNT( use_n_times<1>(2*m1), 0 ); - VERIFY_EVALUATION_COUNT( use_n_times<2>(2*m1), 1 ); - } - VERIFY_EVALUATION_COUNT( use_n_times<2>(m1+m1), 0 ); - VERIFY_EVALUATION_COUNT( use_n_times<3>(m1+m1), 1 ); - VERIFY_EVALUATION_COUNT( use_n_times<1>(m1*m1.transpose()), 1 ); - VERIFY_EVALUATION_COUNT( use_n_times<2>(m1*m1.transpose()), 1 ); - - VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 2 ); // FIXME should already be 1 thanks the already existing rule - VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 2 ); + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 1 ); VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.template triangularView().solve(m1.col(0))), 1 ); VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.template triangularView().solve(m1.col(0))), 1 ); diff --git a/test/product.h b/test/product.h index 672d0cee9..9dfff9303 100644 --- a/test/product.h +++ b/test/product.h @@ -111,6 +111,15 @@ template void product(const MatrixType& m) vcres.noalias() -= m1.transpose() * v1; VERIFY_IS_APPROX(vcres, vc2 - m1.transpose() * v1); + // test d ?= a+b*c rules + res.noalias() = square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + res.noalias() += square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, 2*(square + m1 * m2.transpose())); + res.noalias() -= square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + + tm1 = m1; VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1); VERIFY_IS_APPROX(v1.transpose() * tm1, v1.transpose() * m1); diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 16f6266f7..ff93cb881 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -47,6 +47,10 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * m2.adjoint(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * (m1*s3+m2*s2).adjoint(), 1); VERIFY_EVALUATION_COUNT( m3.noalias() = (s1 * m1).adjoint() * s2 * m2, 0); diff --git a/test/redux.cpp b/test/redux.cpp index f3e7cc2a7..9b0767c73 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed From 6163db814ca35aa452db962861ef04cdc6ca41ef Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 10 Oct 2015 22:38:55 +0200 Subject: [PATCH 090/344] bug #1085: workaround gcc default ABI issue --- Eigen/src/Core/arch/SSE/PacketMath.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 2e0a807bf..3c30b2cf0 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -545,11 +545,11 @@ template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { retu template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) { Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); - return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); + return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { - return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); + return pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a))); } template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) @@ -581,7 +581,7 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { Packet4i tmp0 = _mm_hadd_epi32(a,a); - return pfirst(_mm_hadd_epi32(tmp0,tmp0)); + return pfirst(_mm_hadd_epi32(tmp0,tmp0)); } #else template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) From 63e29e7765825f02ee7a1d6d24a82b2beb4bb924 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 11 Oct 2015 22:47:28 +0200 Subject: [PATCH 091/344] Workaround ICC issue with first_aligned --- Eigen/src/Core/DenseCoeffsBase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 9581757f3..339c0986b 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -617,7 +617,7 @@ static inline Index first_default_aligned(const DenseBase& m) { typedef typename Derived::Scalar Scalar; typedef typename packet_traits::type DefaultPacketType; - return first_aligned::alignment>(m); + return internal::first_aligned::alignment),Derived>(m); } template::ret> From 6407e367ee20b626355fe4c5567d1941d1b3fc55 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 12 Oct 2015 09:49:05 +0200 Subject: [PATCH 092/344] Add missing epxlicit keyword, and fix regression in DynamicSparseMatrix --- Eigen/src/SparseCore/SparseCompressedBase.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index 97ca44761..c8a2705f9 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -120,12 +120,18 @@ class SparseCompressedBase::InnerIterator } } - InnerIterator(const SparseCompressedBase& mat) + explicit InnerIterator(const SparseCompressedBase& mat) : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_id(0), m_end(mat.nonZeros()) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); } + explicit InnerIterator(const internal::CompressedStorage& data) + : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_id(0), m_end(data.size()) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + } + inline InnerIterator& operator++() { m_id++; return *this; } inline const Scalar& value() const { return m_values[m_id]; } @@ -173,9 +179,17 @@ class SparseCompressedBase::ReverseInnerIterator } } - ReverseInnerIterator(const SparseCompressedBase& mat) + explicit ReverseInnerIterator(const SparseCompressedBase& mat) : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(0), m_start(0), m_id(mat.nonZeros()) - {} + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + } + + explicit ReverseInnerIterator(const internal::CompressedStorage& data) + : m_values(&data.value(0)), m_indices(&data.index(0)), m_outer(0), m_start(0), m_id(data.size()) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + } inline ReverseInnerIterator& operator--() { --m_id; return *this; } From 252e89b11b1107da83259d55f4831977f19d6dbc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 12 Oct 2015 16:20:12 +0200 Subject: [PATCH 093/344] bug #1086: replace deprecated UF_long by SuiteSparse_long --- Eigen/src/CholmodSupport/CholmodSupport.h | 2 +- Eigen/src/SPQRSupport/SuiteSparseQRSupport.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index 3ff0c6fc9..f33aa9bf1 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -78,7 +78,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat) { res.itype = CHOLMOD_INT; } - else if (internal::is_same<_StorageIndex,UF_long>::value) + else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value) { res.itype = CHOLMOD_LONG; } diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h index ac2de9b04..d2f053fa5 100644 --- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h @@ -48,7 +48,7 @@ namespace Eigen { * You can then apply it to a vector. * * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix. - * NOTE : The Index type of R is always UF_long. You can get it with SPQR::Index + * NOTE : The Index type of R is always SuiteSparse_long. You can get it with SPQR::Index * * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<> * @@ -65,7 +65,7 @@ class SPQR : public SparseSolverBase > public: typedef typename _MatrixType::Scalar Scalar; typedef typename _MatrixType::RealScalar RealScalar; - typedef UF_long StorageIndex ; + typedef SuiteSparse_long StorageIndex ; typedef SparseMatrix MatrixType; typedef Map > PermutationType; public: From ea9749fd6c9c585e573a4f33dd45943d69030e09 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 13 Oct 2015 09:53:46 +0200 Subject: [PATCH 094/344] Fix packetmath unit test for pdiv not being always defined --- test/packetmath.cpp | 114 +++++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/test/packetmath.cpp b/test/packetmath.cpp index b2b1cadc9..568058f1a 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -49,14 +49,6 @@ template bool areApprox(const Scalar* a, const Scalar* b, int s return true; } - -#define CHECK_CWISE2(REFOP, POP) { \ - for (int i=0; i(data1), internal::pload(data1+PacketSize))); \ - VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ -} - #define CHECK_CWISE1(REFOP, POP) { \ for (int i=0; i VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ } +#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \ + packet_helper h; \ + for (int i=0; i template void packetmath() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; typedef typename NumTraits::Real RealScalar; const int max_size = PacketSize > 4 ? PacketSize : 4; @@ -153,13 +154,17 @@ template void packetmath() VERIFY(areApprox(ref, data2, PacketSize) && "internal::palign"); } - CHECK_CWISE2(REF_ADD, internal::padd); - CHECK_CWISE2(REF_SUB, internal::psub); - CHECK_CWISE2(REF_MUL, internal::pmul); - #if !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) - if (!internal::is_same::value) - CHECK_CWISE2(REF_DIV, internal::pdiv); - #endif + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasNegate); + VERIFY((internal::is_same::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv); + + CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd); + CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub); + CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul); + CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv); + CHECK_CWISE1(internal::negate, internal::pnegate); CHECK_CWISE1(numext::conj, internal::pconj); @@ -262,7 +267,7 @@ template void packetmath() } } - if (internal::packet_traits::HasBlend) { + if (PacketTraits::HasBlend) { Packet thenPacket = internal::pload(data1); Packet elsePacket = internal::pload(data2); EIGEN_ALIGN_MAX internal::Selector selector; @@ -282,42 +287,43 @@ template void packetmath() template void packetmath_real() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; const int size = PacketSize*4; - EIGEN_ALIGN_MAX Scalar data1[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar data2[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar ref[internal::packet_traits::size*4]; + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); data2[i] = internal::random(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); } - CHECK_CWISE1_IF(internal::packet_traits::HasSin, std::sin, internal::psin); - CHECK_CWISE1_IF(internal::packet_traits::HasCos, std::cos, internal::pcos); - CHECK_CWISE1_IF(internal::packet_traits::HasTan, std::tan, internal::ptan); + CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin); + CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); + CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); for (int i=0; i(-1,1); data2[i] = internal::random(-1,1); } - CHECK_CWISE1_IF(internal::packet_traits::HasASin, std::asin, internal::pasin); - CHECK_CWISE1_IF(internal::packet_traits::HasACos, std::acos, internal::pacos); + CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin); + CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos); for (int i=0; i(-87,88); data2[i] = internal::random(-87,88); } - CHECK_CWISE1_IF(internal::packet_traits::HasExp, std::exp, internal::pexp); - if(internal::packet_traits::HasExp && internal::packet_traits::size>=2) + CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp); + if(PacketTraits::HasExp && PacketTraits::size>=2) { data1[0] = std::numeric_limits::quiet_NaN(); data1[1] = std::numeric_limits::epsilon(); - packet_helper::HasExp,Packet> h; + packet_helper h; h.store(data2, internal::pexp(h.load(data1))); VERIFY((numext::isnan)(data2[0])); VERIFY_IS_EQUAL(std::exp(std::numeric_limits::epsilon()), data2[1]); @@ -348,13 +354,13 @@ template void packetmath_real() } if(internal::random(0,1)<0.1) data1[internal::random(0, PacketSize)] = 0; - CHECK_CWISE1_IF(internal::packet_traits::HasSqrt, std::sqrt, internal::psqrt); - CHECK_CWISE1_IF(internal::packet_traits::HasLog, std::log, internal::plog); - if(internal::packet_traits::HasLog && internal::packet_traits::size>=2) + CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); + CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); + if(PacketTraits::HasLog && PacketTraits::size>=2) { data1[0] = std::numeric_limits::quiet_NaN(); data1[1] = std::numeric_limits::epsilon(); - packet_helper::HasLog,Packet> h; + packet_helper h; h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); VERIFY_IS_EQUAL(std::log(std::numeric_limits::epsilon()), data2[1]); @@ -391,22 +397,26 @@ template void packetmath_real() template void packetmath_notcomplex() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; - EIGEN_ALIGN_MAX Scalar data1[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar data2[internal::packet_traits::size*4]; - EIGEN_ALIGN_MAX Scalar ref[internal::packet_traits::size*4]; + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; - Array::Map(data1, internal::packet_traits::size*4).setRandom(); + Array::Map(data1, PacketTraits::size*4).setRandom(); ref[0] = data1[0]; for (int i=0; i(data1))) && "internal::predux_min"); - CHECK_CWISE2((std::min), internal::pmin); - CHECK_CWISE2((std::max), internal::pmax); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax); + + CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin); + CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax); CHECK_CWISE1(abs, internal::pabs); ref[0] = data1[0]; @@ -422,8 +432,9 @@ template void packetmath_notcomplex() template void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) { - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; internal::conj_if cj0; internal::conj_if cj1; @@ -450,8 +461,9 @@ template void test_conj_helper(Scalar template void packetmath_complex() { - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; const int size = PacketSize*4; EIGEN_ALIGN_MAX Scalar data1[PacketSize*4]; @@ -478,10 +490,12 @@ template void packetmath_complex() } } -template void packetmath_scatter_gather() { - typedef typename internal::packet_traits::type Packet; +template void packetmath_scatter_gather() +{ + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; typedef typename NumTraits::Real RealScalar; - const int PacketSize = internal::packet_traits::size; + const int PacketSize = PacketTraits::size; EIGEN_ALIGN_MAX Scalar data1[PacketSize]; RealScalar refvalue = 0; for (int i=0; i Date: Tue, 13 Oct 2015 09:58:54 +0200 Subject: [PATCH 095/344] update mpreal.h --- unsupported/test/mpreal/mpreal.h | 855 ++++++++++++++++--------------- 1 file changed, 444 insertions(+), 411 deletions(-) diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index 104cb686f..f896515aa 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -1,33 +1,34 @@ /* - MPFR C++: Multi-precision floating point number class for C++. + MPFR C++: Multi-precision floating point number class for C++. Based on MPFR library: http://mpfr.org Project homepage: http://www.holoborodko.com/pavel/mpfr Contact e-mail: pavel@holoborodko.com - Copyright (c) 2008-2014 Pavel Holoborodko + Copyright (c) 2008-2015 Pavel Holoborodko Contributors: - Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman, - Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen, - Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng, + Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman, + Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen, + Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng, Alexei Zubanov, Jauhien Piatlicki, Victor Berger, John Westwood, - Petr Aleksandrov, Orion Poplawski, Charles Karney. + Petr Aleksandrov, Orion Poplawski, Charles Karney, Arash Partow, + Rodney James, Jorge Leitao. Licensing: (A) MPFR C++ is under GNU General Public License ("GPL"). - - (B) Non-free licenses may also be purchased from the author, for users who + + (B) Non-free licenses may also be purchased from the author, for users who do not want their programs protected by the GPL. - The non-free licenses are for users that wish to use MPFR C++ in - their products but are unwilling to release their software - under the GPL (which would require them to release source code + The non-free licenses are for users that wish to use MPFR C++ in + their products but are unwilling to release their software + under the GPL (which would require them to release source code and allow free redistribution). Such users can purchase an unlimited-use license from the author. Contact us for more details. - + GNU General Public License ("GPL") copyright permissions statement: ************************************************************************** This program is free software: you can redistribute it and/or modify @@ -55,10 +56,11 @@ #include #include #include +#include +#include +#include // Options -// FIXME HAVE_INT64_SUPPORT leads to clashes with long int and int64_t on some systems. -//#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC. #define MPREAL_HAVE_MSVC_DEBUGVIEW // Enable Debugger Visualizer for "Debug" builds in MSVC. #define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Enable extended std::numeric_limits specialization. // Meaning that "digits", "round_style" and similar members are defined as functions, not constants. @@ -66,19 +68,19 @@ // Library version #define MPREAL_VERSION_MAJOR 3 -#define MPREAL_VERSION_MINOR 5 -#define MPREAL_VERSION_PATCHLEVEL 9 -#define MPREAL_VERSION_STRING "3.5.9" +#define MPREAL_VERSION_MINOR 6 +#define MPREAL_VERSION_PATCHLEVEL 2 +#define MPREAL_VERSION_STRING "3.6.2" // Detect compiler using signatures from http://predef.sourceforge.net/ #if defined(__GNUC__) && defined(__INTEL_COMPILER) - #define IsInf(x) (isinf)(x) // Intel ICC compiler on Linux + #define IsInf(x) isinf(x) // Intel ICC compiler on Linux -#elif defined(_MSC_VER) // Microsoft Visual C++ - #define IsInf(x) (!_finite(x)) +#elif defined(_MSC_VER) // Microsoft Visual C++ + #define IsInf(x) (!_finite(x)) #else - #define IsInf(x) (std::isinf)(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance + #define IsInf(x) std::isinf(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance #endif // A Clang feature extension to determine compiler features. @@ -93,54 +95,27 @@ #define MPREAL_HAVE_MOVE_SUPPORT - // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization + // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization #define mpfr_is_initialized(x) (0 != (x)->_mpfr_d) #define mpfr_set_uninitialized(x) ((x)->_mpfr_d = 0 ) #endif -// Detect support for explicit converters. +// Detect support for explicit converters. #if (__has_feature(cxx_explicit_conversions) || \ - defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \ - (defined(_MSC_VER) && _MSC_VER >= 1800)) + (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR >= 5) || __cplusplus >= 201103L || \ + (defined(_MSC_VER) && _MSC_VER >= 1800)) #define MPREAL_HAVE_EXPLICIT_CONVERTERS #endif -// Detect available 64-bit capabilities -#if defined(MPREAL_HAVE_INT64_SUPPORT) - - #define MPFR_USE_INTMAX_T // Should be defined before mpfr.h - - #if defined(_MSC_VER) // MSVC + Windows - #if (_MSC_VER >= 1600) - #include // is available only in msvc2010! - - #else // MPFR relies on intmax_t which is available only in msvc2010 - #undef MPREAL_HAVE_INT64_SUPPORT // Besides, MPFR & MPIR have to be compiled with msvc2010 - #undef MPFR_USE_INTMAX_T // Since we cannot detect this, disable x64 by default - // Someone should change this manually if needed. - #endif - - #elif defined (__GNUC__) && defined(__linux__) - #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) || defined (__PPC64__) - #undef MPREAL_HAVE_INT64_SUPPORT // Remove all shaman dances for x64 builds since - #undef MPFR_USE_INTMAX_T // GCC already supports x64 as of "long int" is 64-bit integer, nothing left to do - #else - #include // use int64_t, uint64_t otherwise - #endif - - #else - #include // rely on int64_t, uint64_t in all other cases, Mac OSX, etc. - #endif - -#endif +#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h #if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG) #define MPREAL_MSVC_DEBUGVIEW_CODE DebugView = toString(); #define MPREAL_MSVC_DEBUGVIEW_DATA std::string DebugView; #else - #define MPREAL_MSVC_DEBUGVIEW_CODE - #define MPREAL_MSVC_DEBUGVIEW_DATA + #define MPREAL_MSVC_DEBUGVIEW_CODE + #define MPREAL_MSVC_DEBUGVIEW_DATA #endif #include @@ -150,9 +125,15 @@ #endif // Less important options -#define MPREAL_DOUBLE_BITS_OVERFLOW -1 // Triggers overflow exception during conversion to double if mpreal +#define MPREAL_DOUBLE_BITS_OVERFLOW -1 // Triggers overflow exception during conversion to double if mpreal // cannot fit in MPREAL_DOUBLE_BITS_OVERFLOW bits // = -1 disables overflow checks (default) + +// Fast replacement for mpfr_set_zero(x, +1): +// (a) uses low-level data members, might not be compatible with new versions of MPFR +// (b) sign is not set, add (x)->_mpfr_sign = 1; +#define mpfr_set_zero_fast(x) ((x)->_mpfr_exp = __MPFR_EXP_ZERO) + #if defined(__GNUC__) #define MPREAL_PERMISSIVE_EXPR __extension__ #else @@ -164,9 +145,9 @@ namespace mpfr { class mpreal { private: mpfr_t mp; - + public: - + // Get default rounding mode & precision inline static mp_rnd_t get_default_rnd() { return (mp_rnd_t)(mpfr_get_default_rounding_mode()); } inline static mp_prec_t get_default_prec() { return mpfr_get_default_prec(); } @@ -174,29 +155,26 @@ public: // Constructors && type conversions mpreal(); mpreal(const mpreal& u); - mpreal(const mpf_t u); - mpreal(const mpz_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const mpq_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const long double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const unsigned long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const unsigned int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - - // Construct mpreal from mpfr_t structure. - // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers. - mpreal(const mpfr_t u, bool shared = false); + mpreal(const mpf_t u); + mpreal(const mpz_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const mpq_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const long double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const unsigned long long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const long long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const unsigned long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const unsigned int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); + mpreal(const int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); -#if defined (MPREAL_HAVE_INT64_SUPPORT) - mpreal(const uint64_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const int64_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); -#endif + // Construct mpreal from mpfr_t structure. + // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers. + mpreal(const mpfr_t u, bool shared = false); mpreal(const char* s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd()); mpreal(const std::string& s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd()); - ~mpreal(); + ~mpreal(); #ifdef MPREAL_HAVE_MOVE_SUPPORT mpreal& operator=(mpreal&& v); @@ -205,7 +183,7 @@ public: // Operations // = - // +, -, *, /, ++, --, <<, >> + // +, -, *, /, ++, --, <<, >> // *=, +=, -=, /=, // <, >, ==, <=, >= @@ -215,13 +193,16 @@ public: mpreal& operator=(const mpz_t v); mpreal& operator=(const mpq_t v); mpreal& operator=(const long double v); - mpreal& operator=(const double v); + mpreal& operator=(const double v); mpreal& operator=(const unsigned long int v); + mpreal& operator=(const unsigned long long int v); + mpreal& operator=(const long long int v); mpreal& operator=(const unsigned int v); mpreal& operator=(const long int v); mpreal& operator=(const int v); mpreal& operator=(const char* s); mpreal& operator=(const std::string& s); + template mpreal& operator= (const std::complex& z); // + mpreal& operator+=(const mpreal& v); @@ -235,20 +216,18 @@ public: mpreal& operator+=(const long int u); mpreal& operator+=(const int u); -#if defined (MPREAL_HAVE_INT64_SUPPORT) - mpreal& operator+=(const int64_t u); - mpreal& operator+=(const uint64_t u); - mpreal& operator-=(const int64_t u); - mpreal& operator-=(const uint64_t u); - mpreal& operator*=(const int64_t u); - mpreal& operator*=(const uint64_t u); - mpreal& operator/=(const int64_t u); - mpreal& operator/=(const uint64_t u); -#endif + mpreal& operator+=(const long long int u); + mpreal& operator+=(const unsigned long long int u); + mpreal& operator-=(const long long int u); + mpreal& operator-=(const unsigned long long int u); + mpreal& operator*=(const long long int u); + mpreal& operator*=(const unsigned long long int u); + mpreal& operator/=(const long long int u); + mpreal& operator/=(const unsigned long long int u); const mpreal operator+() const; mpreal& operator++ (); - const mpreal operator++ (int); + const mpreal operator++ (int); // - mpreal& operator-=(const mpreal& v); @@ -266,7 +245,7 @@ public: friend const mpreal operator-(const long int b, const mpreal& a); friend const mpreal operator-(const int b, const mpreal& a); friend const mpreal operator-(const double b, const mpreal& a); - mpreal& operator-- (); + mpreal& operator-- (); const mpreal operator-- (int); // * @@ -279,7 +258,7 @@ public: mpreal& operator*=(const unsigned int v); mpreal& operator*=(const long int v); mpreal& operator*=(const int v); - + // / mpreal& operator/=(const mpreal& v); mpreal& operator/=(const mpz_t v); @@ -308,51 +287,27 @@ public: mpreal& operator>>=(const long int u); mpreal& operator>>=(const int u); - // Boolean Operators - friend bool operator > (const mpreal& a, const mpreal& b); - friend bool operator >= (const mpreal& a, const mpreal& b); - friend bool operator < (const mpreal& a, const mpreal& b); - friend bool operator <= (const mpreal& a, const mpreal& b); - friend bool operator == (const mpreal& a, const mpreal& b); - friend bool operator != (const mpreal& a, const mpreal& b); - - // Optimized specializations for boolean operators - friend bool operator == (const mpreal& a, const unsigned long int b); - friend bool operator == (const mpreal& a, const unsigned int b); - friend bool operator == (const mpreal& a, const long int b); - friend bool operator == (const mpreal& a, const int b); - friend bool operator == (const mpreal& a, const long double b); - friend bool operator == (const mpreal& a, const double b); - // Type Conversion operators - bool toBool (mp_rnd_t mode = GMP_RNDZ) const; - long toLong (mp_rnd_t mode = GMP_RNDZ) const; - unsigned long toULong (mp_rnd_t mode = GMP_RNDZ) const; - float toFloat (mp_rnd_t mode = GMP_RNDN) const; - double toDouble (mp_rnd_t mode = GMP_RNDN) const; - long double toLDouble (mp_rnd_t mode = GMP_RNDN) const; + bool toBool ( ) const; + long toLong (mp_rnd_t mode = GMP_RNDZ) const; + unsigned long toULong (mp_rnd_t mode = GMP_RNDZ) const; + long long toLLong (mp_rnd_t mode = GMP_RNDZ) const; + unsigned long long toULLong (mp_rnd_t mode = GMP_RNDZ) const; + float toFloat (mp_rnd_t mode = GMP_RNDN) const; + double toDouble (mp_rnd_t mode = GMP_RNDN) const; + long double toLDouble (mp_rnd_t mode = GMP_RNDN) const; #if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS) - explicit operator bool () const { return toBool(); } - explicit operator int () const { return int(toLong()); } - explicit operator long () const { return toLong(); } - explicit operator long long () const { return toLong(); } - explicit operator unsigned () const { return unsigned(toULong()); } - explicit operator unsigned long () const { return toULong(); } - explicit operator unsigned long long () const { return toULong(); } - explicit operator float () const { return toFloat(); } - explicit operator double () const { return toDouble(); } - explicit operator long double () const { return toLDouble(); } -#endif - -#if defined (MPREAL_HAVE_INT64_SUPPORT) - int64_t toInt64 (mp_rnd_t mode = GMP_RNDZ) const; - uint64_t toUInt64 (mp_rnd_t mode = GMP_RNDZ) const; - - #if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS) - explicit operator int64_t () const { return toInt64(); } - explicit operator uint64_t () const { return toUInt64(); } - #endif + explicit operator bool () const { return toBool(); } + explicit operator int () const { return int(toLong()); } + explicit operator long () const { return toLong(); } + explicit operator long long () const { return toLLong(); } + explicit operator unsigned () const { return unsigned(toULong()); } + explicit operator unsigned long () const { return toULong(); } + explicit operator unsigned long long () const { return toULLong(); } + explicit operator float () const { return toFloat(); } + explicit operator double () const { return toDouble(); } + explicit operator long double () const { return toLDouble(); } #endif // Get raw pointers so that mpreal can be directly used in raw mpfr_* functions @@ -391,11 +346,12 @@ public: friend inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode); friend inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode); friend int cmpabs(const mpreal& a,const mpreal& b); - + friend const mpreal log (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal log2 (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal logb (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal log10(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal exp (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal exp (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal exp2 (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal exp10(const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal log1p(const mpreal& v, mp_rnd_t rnd_mode); @@ -436,21 +392,22 @@ public: friend const mpreal eint (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal gamma (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal tgamma (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal lngamma (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal lgamma (const mpreal& v, int *signp, mp_rnd_t rnd_mode); friend const mpreal zeta (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal erf (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal erfc (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal besseljn (long n, const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal bessely0 (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal bessely1 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode); + friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode); friend const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode); friend const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode); - friend const mpreal sum (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode); + friend const mpreal sum (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t rnd_mode); friend int sgn(const mpreal& v); // returns -1 or +1 // MPFR 2.4.0 Specifics @@ -465,28 +422,26 @@ public: friend const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); // Modulus after division #endif -// MPFR 3.0.0 Specifics #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) friend const mpreal digamma (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal ai (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear +#endif + +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) friend const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear friend const mpreal grandom (unsigned int seed); #endif - + // Uniformly distributed random number generation in [0,1] using // Mersenne-Twister algorithm by default. // Use parameter to setup seed, e.g.: random((unsigned)time(NULL)) // Check urandom() for more precise control. friend const mpreal random(unsigned int seed); - // Exponent and mantissa manipulation - friend const mpreal frexp(const mpreal& v, mp_exp_t* exp); - friend const mpreal ldexp(const mpreal& v, mp_exp_t exp); - // Splits mpreal value into fractional and integer parts. // Returns fractional part and stores integer part in n. - friend const mpreal modf(const mpreal& v, mpreal& n); + friend const mpreal modf(const mpreal& v, mpreal& n); // Constants // don't forget to call mpfr_free_cache() for every thread where you are using const-functions @@ -515,14 +470,14 @@ public: friend const mpreal frac (const mpreal& v, mp_rnd_t rnd_mode); friend const mpreal remainder ( const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); friend const mpreal remquo (long* q, const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - + // Miscellaneous Functions friend const mpreal nexttoward (const mpreal& x, const mpreal& y); friend const mpreal nextabove (const mpreal& x); friend const mpreal nextbelow (const mpreal& x); // use gmp_randinit_default() to init state, gmp_randclear() to clear - friend const mpreal urandomb (gmp_randstate_t& state); + friend const mpreal urandomb (gmp_randstate_t& state); // MPFR < 2.4.2 Specifics #if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2)) @@ -530,9 +485,9 @@ public: #endif // Instance Checkers - friend bool (isnan) (const mpreal& v); - friend bool (isinf) (const mpreal& v); - friend bool (isfinite) (const mpreal& v); + friend bool isnan (const mpreal& v); + friend bool isinf (const mpreal& v); + friend bool isfinite (const mpreal& v); friend bool isnum (const mpreal& v); friend bool iszero (const mpreal& v); @@ -549,9 +504,9 @@ public: // Aliases for get_prec(), set_prec() - needed for compatibility with std::complex interface inline mpreal& setPrecision(int Precision, mp_rnd_t RoundingMode = get_default_rnd()); inline int getPrecision() const; - + // Set mpreal to +/- inf, NaN, +/-0 - mpreal& setInf (int Sign = +1); + mpreal& setInf (int Sign = +1); mpreal& setNan (); mpreal& setZero (int Sign = +1); mpreal& setSign (int Sign, mp_rnd_t RoundingMode = get_default_rnd()); @@ -560,7 +515,7 @@ public: mp_exp_t get_exp(); int set_exp(mp_exp_t e); int check_range (int t, mp_rnd_t rnd_mode = get_default_rnd()); - int subnormalize (int t,mp_rnd_t rnd_mode = get_default_rnd()); + int subnormalize (int t, mp_rnd_t rnd_mode = get_default_rnd()); // Inexact conversion from float inline bool fits_in_bits(double x, int n); @@ -580,7 +535,7 @@ public: // Efficient swapping of two mpreal values - needed for std algorithms friend void swap(mpreal& x, mpreal& y); - + friend const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); friend const mpreal fmin(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); @@ -590,7 +545,7 @@ private: // // mpfr::mpreal= ; Show value only // mpfr::mpreal=, bits ; Show value & precision - // + // // at the beginning of // [Visual Studio Installation Folder]\Common7\Packages\Debugger\autoexp.dat MPREAL_MSVC_DEBUGVIEW_DATA @@ -609,15 +564,15 @@ public: ////////////////////////////////////////////////////////////////////////// // Constructors & converters // Default constructor: creates mp number and initializes it to 0. -inline mpreal::mpreal() -{ - mpfr_init2 (mpfr_ptr(), mpreal::get_default_prec()); - mpfr_set_ui(mpfr_ptr(), 0, mpreal::get_default_rnd()); +inline mpreal::mpreal() +{ + mpfr_init2(mpfr_ptr(), mpreal::get_default_prec()); + mpfr_set_zero_fast(mpfr_ptr()); MPREAL_MSVC_DEBUGVIEW_CODE; } -inline mpreal::mpreal(const mpreal& u) +inline mpreal::mpreal(const mpreal& u) { mpfr_init2(mpfr_ptr(),mpfr_get_prec(u.mpfr_srcptr())); mpfr_set (mpfr_ptr(),u.mpfr_srcptr(),mpreal::get_default_rnd()); @@ -628,7 +583,7 @@ inline mpreal::mpreal(const mpreal& u) #ifdef MPREAL_HAVE_MOVE_SUPPORT inline mpreal::mpreal(mpreal&& other) { - mpfr_set_uninitialized(mpfr_ptr()); // make sure "other" holds no pinter to actual data + mpfr_set_uninitialized(mpfr_ptr()); // make sure "other" holds no pointer to actual data mpfr_swap(mpfr_ptr(), other.mpfr_ptr()); MPREAL_MSVC_DEBUGVIEW_CODE; @@ -700,15 +655,31 @@ inline mpreal::mpreal(const double u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const long double u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_ld(mpfr_ptr(), u, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } +inline mpreal::mpreal(const unsigned long long int u, mp_prec_t prec, mp_rnd_t mode) +{ + mpfr_init2 (mpfr_ptr(), prec); + mpfr_set_uj(mpfr_ptr(), u, mode); + + MPREAL_MSVC_DEBUGVIEW_CODE; +} + +inline mpreal::mpreal(const long long int u, mp_prec_t prec, mp_rnd_t mode) +{ + mpfr_init2 (mpfr_ptr(), prec); + mpfr_set_sj(mpfr_ptr(), u, mode); + + MPREAL_MSVC_DEBUGVIEW_CODE; +} + inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_ui(mpfr_ptr(), u, mode); @@ -716,7 +687,7 @@ inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_ui(mpfr_ptr(), u, mode); @@ -724,7 +695,7 @@ inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_si(mpfr_ptr(), u, mode); @@ -732,35 +703,17 @@ inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode) } inline mpreal::mpreal(const int u, mp_prec_t prec, mp_rnd_t mode) -{ +{ mpfr_init2 (mpfr_ptr(), prec); mpfr_set_si(mpfr_ptr(), u, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline mpreal::mpreal(const uint64_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_uj(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const int64_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_sj(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} -#endif - inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode) { mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_str(mpfr_ptr(), s, base, mode); + mpfr_set_str(mpfr_ptr(), s, base, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -768,7 +721,7 @@ inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode) inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t mode) { mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode); + mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -776,15 +729,15 @@ inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t m inline void mpreal::clear(::mpfr_ptr x) { #ifdef MPREAL_HAVE_MOVE_SUPPORT - if(mpfr_is_initialized(x)) + if(mpfr_is_initialized(x)) #endif mpfr_clear(x); } -inline mpreal::~mpreal() -{ +inline mpreal::~mpreal() +{ clear(mpfr_ptr()); -} +} // internal namespace needed for template magic namespace internal{ @@ -792,58 +745,55 @@ namespace internal{ // Use SFINAE to restrict arithmetic operations instantiation only for numeric types // This is needed for smooth integration with libraries based on expression templates, like Eigen. // TODO: Do the same for boolean operators. - template struct result_type {}; - - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; + template struct result_type {}; -#if defined (MPREAL_HAVE_INT64_SUPPORT) - template <> struct result_type {typedef mpreal type;}; - template <> struct result_type {typedef mpreal type;}; -#endif + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; + template <> struct result_type {typedef mpreal type;}; } // + Addition -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator+(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) += rhs; } -template -inline const typename internal::result_type::type - operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs; } +template +inline const typename internal::result_type::type + operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs; } // - Subtraction -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator-(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) -= rhs; } -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator-(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) -= rhs; } // * Multiplication -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator*(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) *= rhs; } -template -inline const typename internal::result_type::type - operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs; } +template +inline const typename internal::result_type::type + operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs; } // / Division -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator/(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) /= rhs; } -template -inline const typename internal::result_type::type +template +inline const typename internal::result_type::type operator/(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) /= rhs; } ////////////////////////////////////////////////////////////////////////// @@ -893,17 +843,17 @@ const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode = mpreal::g const mpreal pow(const int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const long double a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); +const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); const mpreal pow(const double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); @@ -920,9 +870,9 @@ inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode = mpr inline mpreal machine_epsilon(mp_prec_t prec = mpreal::get_default_prec()); // Returns smallest eps such that x + eps != x (relative machine epsilon) -inline mpreal machine_epsilon(const mpreal& x); +inline mpreal machine_epsilon(const mpreal& x); -// Gives max & min values for the required precision, +// Gives max & min values for the required precision, // minval is 'safe' meaning 1 / minval does not overflow // maxval is 'safe' meaning 1 / maxval does not underflow inline mpreal minval(mp_prec_t prec = mpreal::get_default_prec()); @@ -935,13 +885,13 @@ inline bool isEqualFuzzy(const mpreal& a, const mpreal& b, const mpreal& eps); inline bool isEqualFuzzy(const mpreal& a, const mpreal& b); // 'Bitwise' equality check -// maxUlps - a and b can be apart by maxUlps binary numbers. +// maxUlps - a and b can be apart by maxUlps binary numbers. inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps); ////////////////////////////////////////////////////////////////////////// -// Convert precision in 'bits' to decimal digits and vice versa. -// bits = ceil(digits*log[2](10)) -// digits = floor(bits*log[10](2)) +// Convert precision in 'bits' to decimal digits and vice versa. +// bits = ceil(digits*log[2](10)) +// digits = floor(bits*log[10](2)) inline mp_prec_t digits2bits(int d); inline int bits2digits(mp_prec_t b); @@ -979,7 +929,7 @@ inline mpreal& mpreal::operator=(const mpreal& v) inline mpreal& mpreal::operator=(const mpf_t v) { mpfr_set_f(mpfr_ptr(), v, mpreal::get_default_rnd()); - + MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } @@ -987,7 +937,7 @@ inline mpreal& mpreal::operator=(const mpf_t v) inline mpreal& mpreal::operator=(const mpz_t v) { mpfr_set_z(mpfr_ptr(), v, mpreal::get_default_rnd()); - + MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } @@ -1000,16 +950,16 @@ inline mpreal& mpreal::operator=(const mpq_t v) return *this; } -inline mpreal& mpreal::operator=(const long double v) -{ +inline mpreal& mpreal::operator=(const long double v) +{ mpfr_set_ld(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator=(const double v) -{ +inline mpreal& mpreal::operator=(const double v) +{ #if (MPREAL_DOUBLE_BITS_OVERFLOW > -1) if(fits_in_bits(v, MPREAL_DOUBLE_BITS_OVERFLOW)) { @@ -1024,33 +974,49 @@ inline mpreal& mpreal::operator=(const double v) return *this; } -inline mpreal& mpreal::operator=(const unsigned long int v) -{ - mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); +inline mpreal& mpreal::operator=(const unsigned long int v) +{ + mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator=(const unsigned int v) -{ - mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); +inline mpreal& mpreal::operator=(const unsigned int v) +{ + mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator=(const long int v) -{ - mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); +inline mpreal& mpreal::operator=(const unsigned long long int v) +{ + mpfr_set_uj(mpfr_ptr(), v, mpreal::get_default_rnd()); + + MPREAL_MSVC_DEBUGVIEW_CODE; + return *this; +} + +inline mpreal& mpreal::operator=(const long long int v) +{ + mpfr_set_sj(mpfr_ptr(), v, mpreal::get_default_rnd()); + + MPREAL_MSVC_DEBUGVIEW_CODE; + return *this; +} + +inline mpreal& mpreal::operator=(const long int v) +{ + mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } inline mpreal& mpreal::operator=(const int v) -{ - mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); +{ + mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -1071,7 +1037,7 @@ inline mpreal& mpreal::operator=(const char* s) if(0 == mpfr_set_str(t, s, 10, mpreal::get_default_rnd())) { - mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); + mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -1094,7 +1060,7 @@ inline mpreal& mpreal::operator=(const std::string& s) if(0 == mpfr_set_str(t, s.c_str(), 10, mpreal::get_default_rnd())) { - mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); + mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); MPREAL_MSVC_DEBUGVIEW_CODE; } @@ -1102,6 +1068,11 @@ inline mpreal& mpreal::operator=(const std::string& s) return *this; } +template +inline mpreal& mpreal::operator= (const std::complex& z) +{ + return *this = z.real(); +} ////////////////////////////////////////////////////////////////////////// // + Addition @@ -1135,9 +1106,9 @@ inline mpreal& mpreal::operator+=(const mpq_t u) inline mpreal& mpreal::operator+= (const long double u) { - *this += mpreal(u); + *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator+= (const double u) @@ -1180,16 +1151,14 @@ inline mpreal& mpreal::operator+=(const int u) return *this; } -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline mpreal& mpreal::operator+=(const int64_t u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator+=(const uint64_t u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator-=(const int64_t u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator-=(const uint64_t u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator*=(const int64_t u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator*=(const uint64_t u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator/=(const int64_t u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator/=(const uint64_t u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -#endif +inline mpreal& mpreal::operator+=(const long long int u) { *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator+=(const unsigned long long int u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator-=(const long long int u) { *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator-=(const unsigned long long int u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator*=(const long long int u) { *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator*=(const unsigned long long int u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator/=(const long long int u) { *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } +inline mpreal& mpreal::operator/=(const unsigned long long int u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } inline const mpreal mpreal::operator+()const { return mpreal(*this); } @@ -1200,7 +1169,7 @@ inline const mpreal operator+(const mpreal& a, const mpreal& b) return c; } -inline mpreal& mpreal::operator++() +inline mpreal& mpreal::operator++() { return *this += 1; } @@ -1212,7 +1181,7 @@ inline const mpreal mpreal::operator++ (int) return x; } -inline mpreal& mpreal::operator--() +inline mpreal& mpreal::operator--() { return *this -= 1; } @@ -1249,9 +1218,9 @@ inline mpreal& mpreal::operator-=(const mpq_t v) inline mpreal& mpreal::operator-=(const long double v) { - *this -= mpreal(v); + *this -= mpreal(v); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator-=(const double v) @@ -1259,7 +1228,7 @@ inline mpreal& mpreal::operator-=(const double v) #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) mpfr_sub_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); #else - *this -= mpreal(v); + *this -= mpreal(v); #endif MPREAL_MSVC_DEBUGVIEW_CODE; @@ -1374,9 +1343,9 @@ inline mpreal& mpreal::operator*=(const mpq_t v) inline mpreal& mpreal::operator*=(const long double v) { - *this *= mpreal(v); + *this *= mpreal(v); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator*=(const double v) @@ -1384,7 +1353,7 @@ inline mpreal& mpreal::operator*=(const double v) #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) mpfr_mul_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); #else - *this *= mpreal(v); + *this *= mpreal(v); #endif MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -1452,7 +1421,7 @@ inline mpreal& mpreal::operator/=(const long double v) { *this /= mpreal(v); MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; + return *this; } inline mpreal& mpreal::operator/=(const double v) @@ -1460,7 +1429,7 @@ inline mpreal& mpreal::operator/=(const double v) #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) mpfr_div_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); #else - *this /= mpreal(v); + *this /= mpreal(v); #endif MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -1671,45 +1640,86 @@ inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode) } ////////////////////////////////////////////////////////////////////////// -//Boolean operators -inline bool operator > (const mpreal& a, const mpreal& b){ return (mpfr_greater_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator >= (const mpreal& a, const mpreal& b){ return (mpfr_greaterequal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator < (const mpreal& a, const mpreal& b){ return (mpfr_less_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator <= (const mpreal& a, const mpreal& b){ return (mpfr_lessequal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator == (const mpreal& a, const mpreal& b){ return (mpfr_equal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator != (const mpreal& a, const mpreal& b){ return (mpfr_lessgreater_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } +//Relational operators -inline bool operator == (const mpreal& a, const unsigned long int b ){ return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const unsigned int b ){ return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long int b ){ return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const int b ){ return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long double b ){ return (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const double b ){ return (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); } +// WARNING: +// +// Please note that following checks for double-NaN are guaranteed to work only in IEEE math mode: +// +// isnan(b) = (b != b) +// isnan(b) = !(b == b) (we use in code below) +// +// Be cautions if you use compiler options which break strict IEEE compliance (e.g. -ffast-math in GCC). +// Use std::isnan instead (C++11). +inline bool operator > (const mpreal& a, const mpreal& b ){ return (mpfr_greater_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator > (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) > 0 ); } -inline bool (isnan) (const mpreal& op){ return (mpfr_nan_p (op.mpfr_srcptr()) != 0 ); } -inline bool (isinf) (const mpreal& op){ return (mpfr_inf_p (op.mpfr_srcptr()) != 0 ); } -inline bool (isfinite) (const mpreal& op){ return (mpfr_number_p (op.mpfr_srcptr()) != 0 ); } +inline bool operator >= (const mpreal& a, const mpreal& b ){ return (mpfr_greaterequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator >= (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) >= 0 ); } + +inline bool operator < (const mpreal& a, const mpreal& b ){ return (mpfr_less_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator < (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) < 0 ); } + +inline bool operator <= (const mpreal& a, const mpreal& b ){ return (mpfr_lessequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator <= (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) <= 0 ); } + +inline bool operator == (const mpreal& a, const mpreal& b ){ return (mpfr_equal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } +inline bool operator == (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); } + +inline bool operator != (const mpreal& a, const mpreal& b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const unsigned long int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const unsigned int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const long int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const int b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const long double b ){ return !(a == b); } +inline bool operator != (const mpreal& a, const double b ){ return !(a == b); } + +inline bool isnan (const mpreal& op){ return (mpfr_nan_p (op.mpfr_srcptr()) != 0 ); } +inline bool isinf (const mpreal& op){ return (mpfr_inf_p (op.mpfr_srcptr()) != 0 ); } +inline bool isfinite (const mpreal& op){ return (mpfr_number_p (op.mpfr_srcptr()) != 0 ); } inline bool iszero (const mpreal& op){ return (mpfr_zero_p (op.mpfr_srcptr()) != 0 ); } inline bool isint (const mpreal& op){ return (mpfr_integer_p(op.mpfr_srcptr()) != 0 ); } #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) inline bool isregular(const mpreal& op){ return (mpfr_regular_p(op.mpfr_srcptr()));} -#endif +#endif ////////////////////////////////////////////////////////////////////////// // Type Converters -inline bool mpreal::toBool (mp_rnd_t /*mode*/) const { return mpfr_zero_p (mpfr_srcptr()) == 0; } -inline long mpreal::toLong (mp_rnd_t mode) const { return mpfr_get_si (mpfr_srcptr(), mode); } -inline unsigned long mpreal::toULong (mp_rnd_t mode) const { return mpfr_get_ui (mpfr_srcptr(), mode); } -inline float mpreal::toFloat (mp_rnd_t mode) const { return mpfr_get_flt(mpfr_srcptr(), mode); } -inline double mpreal::toDouble (mp_rnd_t mode) const { return mpfr_get_d (mpfr_srcptr(), mode); } -inline long double mpreal::toLDouble(mp_rnd_t mode) const { return mpfr_get_ld (mpfr_srcptr(), mode); } - -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline int64_t mpreal::toInt64 (mp_rnd_t mode) const{ return mpfr_get_sj(mpfr_srcptr(), mode); } -inline uint64_t mpreal::toUInt64(mp_rnd_t mode) const{ return mpfr_get_uj(mpfr_srcptr(), mode); } -#endif +inline bool mpreal::toBool ( ) const { return mpfr_zero_p (mpfr_srcptr()) == 0; } +inline long mpreal::toLong (mp_rnd_t mode) const { return mpfr_get_si (mpfr_srcptr(), mode); } +inline unsigned long mpreal::toULong (mp_rnd_t mode) const { return mpfr_get_ui (mpfr_srcptr(), mode); } +inline float mpreal::toFloat (mp_rnd_t mode) const { return mpfr_get_flt(mpfr_srcptr(), mode); } +inline double mpreal::toDouble (mp_rnd_t mode) const { return mpfr_get_d (mpfr_srcptr(), mode); } +inline long double mpreal::toLDouble(mp_rnd_t mode) const { return mpfr_get_ld (mpfr_srcptr(), mode); } +inline long long mpreal::toLLong (mp_rnd_t mode) const { return mpfr_get_sj (mpfr_srcptr(), mode); } +inline unsigned long long mpreal::toULLong (mp_rnd_t mode) const { return mpfr_get_uj (mpfr_srcptr(), mode); } inline ::mpfr_ptr mpreal::mpfr_ptr() { return mp; } inline ::mpfr_srcptr mpreal::mpfr_ptr() const { return mp; } @@ -1755,21 +1765,21 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const std::ostringstream format; - int digits = (n >= 0) ? n : bits2digits(mpfr_get_prec(mpfr_srcptr())); - + int digits = (n >= 0) ? n : 1 + bits2digits(mpfr_get_prec(mpfr_srcptr())); + format << "%." << digits << "RNg"; return toString(format.str()); #else - char *s, *ns = NULL; + char *s, *ns = NULL; size_t slen, nslen; mp_exp_t exp; std::string out; if(mpfr_inf_p(mp)) - { + { if(mpfr_sgn(mp)>0) return "+Inf"; else return "-Inf"; } @@ -1784,7 +1794,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { slen = strlen(s); nslen = strlen(ns); - if(nslen<=slen) + if(nslen<=slen) { mpfr_free_str(s); s = ns; @@ -1801,7 +1811,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+exp) ptr--; + while (*ptr=='0' && ptr>s+exp) ptr--; if(ptr==s+exp) out = std::string(s,exp+1); else out = std::string(s,exp+1)+'.'+std::string(s+exp+1,ptr-(s+exp+1)+1); @@ -1812,7 +1822,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+exp-1) ptr--; + while (*ptr=='0' && ptr>s+exp-1) ptr--; if(ptr==s+exp-1) out = std::string(s,exp); else out = std::string(s,exp)+'.'+std::string(s+exp,ptr-(s+exp)+1); @@ -1825,7 +1835,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+1) ptr--; + while (*ptr=='0' && ptr>s+1) ptr--; if(ptr==s+1) out = std::string(s,2); else out = std::string(s,2)+'.'+std::string(s+2,ptr-(s+2)+1); @@ -1836,7 +1846,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const { // Remove zeros starting from right end char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s) ptr--; + while (*ptr=='0' && ptr>s) ptr--; if(ptr==s) out = std::string(s,1); else out = std::string(s,1)+'.'+std::string(s+1,ptr-(s+1)+1); @@ -1863,7 +1873,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const ////////////////////////////////////////////////////////////////////////// // I/O -inline std::ostream& mpreal::output(std::ostream& os) const +inline std::ostream& mpreal::output(std::ostream& os) const { std::ostringstream format; const std::ios::fmtflags flags = os.flags(); @@ -1926,8 +1936,7 @@ inline int bits2digits(mp_prec_t b) // Set/Get number properties inline int sgn(const mpreal& op) { - int r = mpfr_signbit(op.mpfr_srcptr()); - return (r > 0? -1 : 1); + return mpfr_sgn(op.mpfr_srcptr()); } inline mpreal& mpreal::setSign(int sign, mp_rnd_t RoundingMode) @@ -1949,29 +1958,28 @@ inline mpreal& mpreal::setPrecision(int Precision, mp_rnd_t RoundingMode) return *this; } -inline mpreal& mpreal::setInf(int sign) -{ +inline mpreal& mpreal::setInf(int sign) +{ mpfr_set_inf(mpfr_ptr(), sign); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; -} +} -inline mpreal& mpreal::setNan() +inline mpreal& mpreal::setNan() { mpfr_set_nan(mpfr_ptr()); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::setZero(int sign) +inline mpreal& mpreal::setZero(int sign) { - #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) mpfr_set_zero(mpfr_ptr(), sign); #else mpfr_set_si(mpfr_ptr(), 0, (mpfr_get_default_rounding_mode)()); setSign(sign); -#endif +#endif MPREAL_MSVC_DEBUGVIEW_CODE; return *this; @@ -2000,23 +2008,32 @@ inline int mpreal::set_exp (mp_exp_t e) return x; } -inline const mpreal frexp(const mpreal& v, mp_exp_t* exp) +inline const mpreal frexp(const mpreal& x, mp_exp_t* exp, mp_rnd_t mode = mpreal::get_default_rnd()) { - mpreal x(v); - *exp = x.get_exp(); - x.set_exp(0); - return x; + mpreal y(x); +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) + mpfr_frexp(exp,y.mpfr_ptr(),x.mpfr_srcptr(),mode); +#else + *exp = mpfr_get_exp(y.mpfr_srcptr()); + mpfr_set_exp(y.mpfr_ptr(),0); +#endif + return y; } inline const mpreal ldexp(const mpreal& v, mp_exp_t exp) { mpreal x(v); - // rounding is not important since we just increasing the exponent - mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd()); + // rounding is not important since we are just increasing the exponent (= exact operation) + mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd()); return x; } +inline const mpreal scalbn(const mpreal& v, mp_exp_t exp) +{ + return ldexp(v, exp); +} + inline mpreal machine_epsilon(mp_prec_t prec) { /* the smallest eps such that 1 + eps != 1 */ @@ -2024,7 +2041,7 @@ inline mpreal machine_epsilon(mp_prec_t prec) } inline mpreal machine_epsilon(const mpreal& x) -{ +{ /* the smallest eps such that x + eps != x */ if( x < 0) { @@ -2045,7 +2062,7 @@ inline mpreal minval(mp_prec_t prec) inline mpreal maxval(mp_prec_t prec) { /* max = (1 - eps) * 2^emax, eps is machine epsilon */ - return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax(); + return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax(); } inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps) @@ -2063,12 +2080,26 @@ inline bool isEqualFuzzy(const mpreal& a, const mpreal& b) return isEqualFuzzy(a, b, machine_epsilon((max)(1, (min)(abs(a), abs(b))))); } +////////////////////////////////////////////////////////////////////////// +// C++11 sign functions. +inline mpreal copysign(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) +{ + mpreal rop(0, mpfr_get_prec(x.mpfr_ptr())); + mpfr_setsign(rop.mpfr_ptr(), x.mpfr_srcptr(), mpfr_signbit(y.mpfr_srcptr()), rnd_mode); + return rop; +} + +inline bool signbit(const mpreal& x) +{ + return mpfr_signbit(x.mpfr_srcptr()); +} + inline const mpreal modf(const mpreal& v, mpreal& n) { mpreal f(v); // rounding is not important since we are using the same number - mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd()); + mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd()); mpfr_trunc(n.mpfr_ptr(),v.mpfr_srcptr()); return f; } @@ -2131,7 +2162,7 @@ inline mp_exp_t mpreal::get_emax_max (void) #define MPREAL_UNARY_MATH_FUNCTION_BODY(f) \ mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); \ mpfr_##f(y.mpfr_ptr(), x.mpfr_srcptr(), r); \ - return y; + return y; inline const mpreal sqr (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sqr ); } @@ -2154,7 +2185,7 @@ inline const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode) inline const mpreal sqrt(const long int v, mp_rnd_t rnd_mode) { if (v>=0) return sqrt(static_cast(v),rnd_mode); - else return mpreal().setNan(); // NaN + else return mpreal().setNan(); // NaN } inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode) @@ -2165,9 +2196,9 @@ inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode) inline const mpreal root(const mpreal& x, unsigned long int k, mp_rnd_t r = mpreal::get_default_rnd()) { - mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); - mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r); - return y; + mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); + mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r); + return y; } inline const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t r = mpreal::get_default_rnd()) @@ -2209,6 +2240,8 @@ inline const mpreal acos (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd inline const mpreal asin (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(asin ); } inline const mpreal atan (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(atan ); } +inline const mpreal logb (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { return log2 (abs(x),r); } + inline const mpreal acot (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return atan (1/v, r); } inline const mpreal asec (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return acos (1/v, r); } inline const mpreal acsc (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return asin (1/v, r); } @@ -2230,6 +2263,7 @@ inline const mpreal log1p (const mpreal& x, mp_rnd_t r = mpreal::get_default_r inline const mpreal expm1 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(expm1 ); } inline const mpreal eint (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(eint ); } inline const mpreal gamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(gamma ); } +inline const mpreal tgamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(gamma ); } inline const mpreal lngamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(lngamma); } inline const mpreal zeta (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(zeta ); } inline const mpreal erf (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(erf ); } @@ -2254,7 +2288,7 @@ inline const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = } inline const mpreal remainder (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ +{ mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision())); mpfr_remainder(a.mpfr_ptr(), x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode); return a; @@ -2307,9 +2341,9 @@ inline const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, m mpreal a; mp_prec_t p1, p2, p3; - p1 = v1.get_prec(); - p2 = v2.get_prec(); - p3 = v3.get_prec(); + p1 = v1.get_prec(); + p2 = v2.get_prec(); + p3 = v3.get_prec(); a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1)); @@ -2322,9 +2356,9 @@ inline const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, m mpreal a; mp_prec_t p1, p2, p3; - p1 = v1.get_prec(); - p2 = v2.get_prec(); - p3 = v3.get_prec(); + p1 = v1.get_prec(); + p2 = v2.get_prec(); + p3 = v3.get_prec(); a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1)); @@ -2337,8 +2371,8 @@ inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode = mpreal a; mp_prec_t p1, p2; - p1 = v1.get_prec(); - p2 = v2.get_prec(); + p1 = v1.get_prec(); + p2 = v2.get_prec(); a.set_prec(p1>p2?p1:p2); @@ -2347,16 +2381,17 @@ inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode = return a; } -inline const mpreal sum (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) +inline const mpreal sum (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t mode = mpreal::get_default_rnd()) { - mpreal x; - mpfr_ptr* t; - unsigned long int i; + mpfr_srcptr *p = new mpfr_srcptr[n]; - t = new mpfr_ptr[n]; - for (i=0;ixp?yp:xp); @@ -2553,33 +2588,24 @@ inline const mpreal nextbelow (const mpreal& x) inline const mpreal urandomb (gmp_randstate_t& state) { mpreal x; - mpfr_urandomb(x.mp,state); + mpfr_urandomb(x.mpfr_ptr(),state); return x; } -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) -// use gmp_randinit_default() to init state, gmp_randclear() to clear +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) inline const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) { mpreal x; - mpfr_urandom(x.mp,state,rnd_mode); + mpfr_urandom(x.mpfr_ptr(), state, rnd_mode); return x; } - -inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x; - mpfr_grandom(x.mp, NULL, state, rnd_mode); - return x; -} - -#endif +#endif #if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2)) inline const mpreal random2 (mp_size_t size, mp_exp_t exp) { mpreal x; - mpfr_random2(x.mp,size,exp); + mpfr_random2(x.mpfr_ptr(),size,exp); return x; } #endif @@ -2590,16 +2616,15 @@ inline const mpreal random2 (mp_size_t size, mp_exp_t exp) // seed != 0 inline const mpreal random(unsigned int seed = 0) { - #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) static gmp_randstate_t state; - static bool isFirstTime = true; + static bool initialize = true; - if(isFirstTime) + if(initialize) { gmp_randinit_default(state); gmp_randseed_ui(state,0); - isFirstTime = false; + initialize = false; } if(seed != 0) gmp_randseed_ui(state,seed); @@ -2612,17 +2637,25 @@ inline const mpreal random(unsigned int seed = 0) } -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) +#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) + +inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) +{ + mpreal x; + mpfr_grandom(x.mpfr_ptr(), NULL, state, rnd_mode); + return x; +} + inline const mpreal grandom(unsigned int seed = 0) { static gmp_randstate_t state; - static bool isFirstTime = true; + static bool initialize = true; - if(isFirstTime) + if(initialize) { gmp_randinit_default(state); gmp_randseed_ui(state,0); - isFirstTime = false; + initialize = false; } if(seed != 0) gmp_randseed_ui(state,seed); @@ -2634,17 +2667,17 @@ inline const mpreal grandom(unsigned int seed = 0) ////////////////////////////////////////////////////////////////////////// // Set/Get global properties inline void mpreal::set_default_prec(mp_prec_t prec) -{ - mpfr_set_default_prec(prec); +{ + mpfr_set_default_prec(prec); } inline void mpreal::set_default_rnd(mp_rnd_t rnd_mode) -{ - mpfr_set_default_rounding_mode(rnd_mode); +{ + mpfr_set_default_rounding_mode(rnd_mode); } inline bool mpreal::fits_in_bits(double x, int n) -{ +{ int i; double t; return IsInf(x) || (std::modf ( std::ldexp ( std::frexp ( x, &i ), n ), &t ) == 0.0); @@ -2894,7 +2927,7 @@ inline const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode) else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow } -// pow long double +// pow long double inline const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode) { return pow(mpreal(a),mpreal(b),rnd_mode); @@ -2953,9 +2986,9 @@ namespace std { // we are allowed to extend namespace std with specializations only template <> - inline void swap(mpfr::mpreal& x, mpfr::mpreal& y) - { - return mpfr::swap(x, y); + inline void swap(mpfr::mpreal& x, mpfr::mpreal& y) + { + return mpfr::swap(x, y); } template<> @@ -2966,7 +2999,7 @@ namespace std static const bool is_signed = true; static const bool is_integer = false; static const bool is_exact = false; - static const int radix = 2; + static const int radix = 2; static const bool has_infinity = true; static const bool has_quiet_NaN = true; @@ -2986,7 +3019,7 @@ namespace std // Returns smallest eps such that 1 + eps != 1 (classic machine epsilon) inline static mpfr::mpreal epsilon(mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(precision); } - + // Returns smallest eps such that x + eps != x (relative machine epsilon) inline static mpfr::mpreal epsilon(const mpfr::mpreal& x) { return mpfr::machine_epsilon(x); } @@ -2994,8 +3027,8 @@ namespace std { mp_rnd_t r = mpfr::mpreal::get_default_rnd(); - if(r == GMP_RNDN) return mpfr::mpreal(0.5, precision); - else return mpfr::mpreal(1.0, precision); + if(r == GMP_RNDN) return mpfr::mpreal(0.5, precision); + else return mpfr::mpreal(1.0, precision); } inline static const mpfr::mpreal infinity() { return mpfr::const_infinity(); } @@ -3006,17 +3039,17 @@ namespace std // Please note, exponent range is not fixed in MPFR static const int min_exponent = MPFR_EMIN_DEFAULT; static const int max_exponent = MPFR_EMAX_DEFAULT; - MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); - MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); + MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); + MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); #ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Following members should be constant according to standard, but they can be variable in MPFR - // So we define them as functions here. + // So we define them as functions here. // // This is preferable way for std::numeric_limits specialization. - // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost. - // See below for compatible implementation. + // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost. + // See below for compatible implementation. inline static float_round_style round_style() { mp_rnd_t r = mpfr::mpreal::get_default_rnd(); @@ -3024,9 +3057,9 @@ namespace std switch (r) { case GMP_RNDN: return round_to_nearest; - case GMP_RNDZ: return round_toward_zero; - case GMP_RNDU: return round_toward_infinity; - case GMP_RNDD: return round_toward_neg_infinity; + case GMP_RNDZ: return round_toward_zero; + case GMP_RNDU: return round_toward_infinity; + case GMP_RNDD: return round_toward_neg_infinity; default: return round_indeterminate; } } @@ -3053,13 +3086,13 @@ namespace std // If possible, please use functions digits() and round_style() defined above. // // These (default) values are preserved for compatibility with existing libraries, e.g. boost. - // Change them accordingly to your application. + // Change them accordingly to your application. // // For example, if you use 256 bits of precision uniformly in your program, then: // digits = 256 - // digits10 = 77 + // digits10 = 77 // max_digits10 = 78 - // + // // Approximate formula for decimal digits is: digits10 = floor(log10(2) * digits). See bits2digits() for more details. static const std::float_round_style round_style = round_to_nearest; @@ -3071,4 +3104,4 @@ namespace std } -#endif /* __MPREAL_H__ */ +#endif /* __MPREAL_H__ */ \ No newline at end of file From ac22b66f1c724cc3a3289585e3847a743e5fe939 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 13 Oct 2015 10:18:09 +0200 Subject: [PATCH 096/344] Fix macro issues --- unsupported/test/mpreal/mpreal.h | 74 ++++++++++++++++---------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index f896515aa..7d14c0961 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -80,7 +80,7 @@ #define IsInf(x) (!_finite(x)) #else - #define IsInf(x) std::isinf(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance + #define IsInf(x) std::isinf EIGEN_NOT_A_MACRO (x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance #endif // A Clang feature extension to determine compiler features. @@ -485,9 +485,9 @@ public: #endif // Instance Checkers - friend bool isnan (const mpreal& v); - friend bool isinf (const mpreal& v); - friend bool isfinite (const mpreal& v); + friend bool (isnan) (const mpreal& v); + friend bool (isinf) (const mpreal& v); + friend bool (isfinite) (const mpreal& v); friend bool isnum (const mpreal& v); friend bool iszero (const mpreal& v); @@ -1653,44 +1653,44 @@ inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode) // Use std::isnan instead (C++11). inline bool operator > (const mpreal& a, const mpreal& b ){ return (mpfr_greater_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } -inline bool operator > (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } -inline bool operator > (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } -inline bool operator > (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } -inline bool operator > (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } -inline bool operator > (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) > 0 ); } -inline bool operator > (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) > 0 ); } +inline bool operator > (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) > 0 ); } inline bool operator >= (const mpreal& a, const mpreal& b ){ return (mpfr_greaterequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } -inline bool operator >= (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } -inline bool operator >= (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } -inline bool operator >= (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } -inline bool operator >= (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } -inline bool operator >= (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) >= 0 ); } -inline bool operator >= (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } +// inline bool operator >= (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (isnan()a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) >= 0 ); } +inline bool operator >= (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) >= 0 ); } inline bool operator < (const mpreal& a, const mpreal& b ){ return (mpfr_less_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } -inline bool operator < (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } -inline bool operator < (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } -inline bool operator < (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } -inline bool operator < (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } -inline bool operator < (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) < 0 ); } -inline bool operator < (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) < 0 ); } +inline bool operator < (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) < 0 ); } inline bool operator <= (const mpreal& a, const mpreal& b ){ return (mpfr_lessequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } -inline bool operator <= (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } -inline bool operator <= (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } -inline bool operator <= (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } -inline bool operator <= (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } -inline bool operator <= (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) <= 0 ); } -inline bool operator <= (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) <= 0 ); } +inline bool operator <= (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) <= 0 ); } inline bool operator == (const mpreal& a, const mpreal& b ){ return (mpfr_equal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); } -inline bool operator == (const mpreal& a, const unsigned long int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const unsigned int b ){ return !isnan(a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const int b ){ return !isnan(a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const double b ){ return !isnan(a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); } +inline bool operator == (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); } inline bool operator != (const mpreal& a, const mpreal& b ){ return !(a == b); } inline bool operator != (const mpreal& a, const unsigned long int b ){ return !(a == b); } @@ -1700,9 +1700,9 @@ inline bool operator != (const mpreal& a, const int b ){ return ! inline bool operator != (const mpreal& a, const long double b ){ return !(a == b); } inline bool operator != (const mpreal& a, const double b ){ return !(a == b); } -inline bool isnan (const mpreal& op){ return (mpfr_nan_p (op.mpfr_srcptr()) != 0 ); } -inline bool isinf (const mpreal& op){ return (mpfr_inf_p (op.mpfr_srcptr()) != 0 ); } -inline bool isfinite (const mpreal& op){ return (mpfr_number_p (op.mpfr_srcptr()) != 0 ); } +inline bool (isnan) (const mpreal& op){ return (mpfr_nan_p (op.mpfr_srcptr()) != 0 ); } +inline bool (isinf) (const mpreal& op){ return (mpfr_inf_p (op.mpfr_srcptr()) != 0 ); } +inline bool (isfinite) (const mpreal& op){ return (mpfr_number_p (op.mpfr_srcptr()) != 0 ); } inline bool iszero (const mpreal& op){ return (mpfr_zero_p (op.mpfr_srcptr()) != 0 ); } inline bool isint (const mpreal& op){ return (mpfr_integer_p(op.mpfr_srcptr()) != 0 ); } From a44d91a0b213f513468c5b09221250513d271205 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 13 Oct 2015 10:53:38 +0200 Subject: [PATCH 097/344] extend unit test for SparseMatrix::prune --- Eigen/src/SparseCore/SparseMatrix.h | 1 - test/sparse_basic.cpp | 21 ++++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 4cf3145fd..354ed45e8 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -509,7 +509,6 @@ class SparseMatrix void prune(const KeepFunc& keep = KeepFunc()) { // TODO optimize the uncompressed mode to avoid moving and allocating the data twice - // TODO also implement a unit test makeCompressed(); StorageIndex k = 0; diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 492b3a4f2..95bbfab0e 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -219,10 +219,10 @@ template void sparse_basic(const SparseMatrixType& re refM2.setZero(); int countFalseNonZero = 0; int countTrueNonZero = 0; - for (Index j=0; j(0,1); if (x<0.1) @@ -232,22 +232,21 @@ template void sparse_basic(const SparseMatrixType& re else if (x<0.5) { countFalseNonZero++; - m2.insertBackByOuterInner(j,i) = Scalar(0); + m2.insert(i,j) = Scalar(0); } else { countTrueNonZero++; - m2.insertBackByOuterInner(j,i) = Scalar(1); - if(SparseMatrixType::IsRowMajor) - refM2(j,i) = Scalar(1); - else - refM2(i,j) = Scalar(1); + m2.insert(i,j) = Scalar(1); + refM2(i,j) = Scalar(1); } } } - m2.finalize(); + if(internal::random()) + m2.makeCompressed(); VERIFY(countFalseNonZero+countTrueNonZero == m2.nonZeros()); - VERIFY_IS_APPROX(m2, refM2); + if(countTrueNonZero>0) + VERIFY_IS_APPROX(m2, refM2); m2.prune(Scalar(1)); VERIFY(countTrueNonZero==m2.nonZeros()); VERIFY_IS_APPROX(m2, refM2); From 9acfc7c4f34ba9f9f8b2d58380732706642dcc25 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 13 Oct 2015 10:55:58 +0200 Subject: [PATCH 098/344] remove reference to internal method --- Eigen/src/SparseCore/SparseMatrix.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 354ed45e8..c8c31fd83 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -532,7 +532,7 @@ class SparseMatrix } /** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched. - * \sa resizeNonZeros(Index), reserve(), setZero() + * \sa reserve(), setZero() */ void conservativeResize(Index rows, Index cols) { @@ -600,7 +600,7 @@ class SparseMatrix * This function does not free the currently allocated memory. To release as much as memory as possible, * call \code mat.data().squeeze(); \endcode after resizing it. * - * \sa resizeNonZeros(Index), reserve(), setZero() + * \sa reserve(), setZero() */ void resize(Index rows, Index cols) { @@ -627,7 +627,6 @@ class SparseMatrix * Resize the nonzero vector to \a size */ void resizeNonZeros(Index size) { - // TODO remove this function m_data.resize(size); } From b9d81c915009e08a2397a2fc2d36a15d16b3b32f Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Tue, 6 Oct 2015 13:29:41 +0200 Subject: [PATCH 099/344] Add a functor to setFromTriplets to handle duplicated entries --- Eigen/src/SparseCore/SparseMatrix.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index c8c31fd83..22a6bd803 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -437,6 +437,10 @@ class SparseMatrix template void setFromTriplets(const InputIterators& begin, const InputIterators& end); + template + void setFromTriplets(const InputIterators& begin, const InputIterators& end); + + template void sumupDuplicates(); //--- @@ -889,7 +893,7 @@ private: namespace internal { -template +template void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, int Options = 0) { EIGEN_UNUSED_VARIABLE(Options); @@ -915,7 +919,7 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa trMat.insertBackUncompressed(it->row(),it->col()) = it->value(); // pass 3: - trMat.sumupDuplicates(); + trMat.template sumupDuplicates(); } // pass 4: transposed copy -> implicit sorting @@ -966,11 +970,24 @@ template template void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets(begin, end, *this); + internal::set_from_triplets, internal::scalar_sum_op >(begin, end, *this); +} + +/** The same as setFromTriplets but when duplicates are met the functor \a DupFunctor is applied: + * \code + * value = DupFunctor()(OldValue, NewValue) + * \endcode + */ +template +template +void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) +{ + internal::set_from_triplets, DupFunctor>(begin, end, *this); } /** \internal */ template +template void SparseMatrix::sumupDuplicates() { eigen_assert(!isCompressed()); @@ -989,7 +1006,7 @@ void SparseMatrix::sumupDuplicates() if(wi(i)>=start) { // we already meet this entry => accumulate it - m_data.value(wi(i)) += m_data.value(k); + m_data.value(wi(i)) = DupFunctor()(m_data.value(wi(i)), m_data.value(k)); } else { From 6edae2d30d5a74a3234f6a91adb5ffdb1b86bbfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20N=C3=BCtzi?= Date: Fri, 9 Oct 2015 18:52:48 +0200 Subject: [PATCH 100/344] added CustomIndex capability only to Tensor and not yet to TensorBase. using Sfinae and is_base_of to select correct template which converts to array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit user: Gabriel Nützi branch 'default' added unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h added unsupported/test/cxx11_tensor_customIndex.cpp changed unsupported/Eigen/CXX11/Tensor changed unsupported/Eigen/CXX11/src/Tensor/Tensor.h changed unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h changed unsupported/test/CMakeLists.txt --- unsupported/Eigen/CXX11/Tensor | 1 + unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 81 ++++++++++++++++++- .../Eigen/CXX11/src/Tensor/TensorMeta.h | 51 ++++++++++++ .../Eigen/CXX11/src/Tensor/TensorMetaMacros.h | 33 ++++++++ unsupported/test/CMakeLists.txt | 1 + unsupported/test/cxx11_tensor_customIndex.cpp | 41 ++++++++++ 6 files changed, 205 insertions(+), 3 deletions(-) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h create mode 100644 unsupported/test/cxx11_tensor_customIndex.cpp diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index cbe416602..ee3a1cdb6 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -59,6 +59,7 @@ #include "src/Tensor/TensorForwardDeclarations.h" #include "src/Tensor/TensorMeta.h" +#include "src/Tensor/TensorMetaMacros.h" #include "src/Tensor/TensorDeviceType.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 6c16e0faa..f9d367e0e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -88,6 +88,11 @@ class Tensor : public TensorBase m_storage; + template + struct isOfNormalIndex{ + static const bool value = internal::is_base_of< array, CustomIndex >::value; + }; + public: // Metadata EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } @@ -111,14 +116,29 @@ class Tensor : public TensorBase{{firstIndex, secondIndex, otherIndices...}}); } + + #endif + + /** Normal Index */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const { eigen_internal_assert(checkIndexRange(indices)); return m_storage.data()[linearizedIndex(indices)]; } + /** Custom Index */ + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const CustomIndex & indices) const + { + return coeff(internal::customIndex2Array(indices)); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { eigen_internal_assert(index >= 0 && index < size()); @@ -135,12 +155,23 @@ class Tensor : public TensorBase& indices) { eigen_internal_assert(checkIndexRange(indices)); return m_storage.data()[linearizedIndex(indices)]; } + /** Custom Index */ + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const CustomIndex & indices) + { + return coeffRef(internal::customIndex2Array(indices)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { eigen_internal_assert(index >= 0 && index < size()); @@ -178,9 +209,20 @@ class Tensor : public TensorBase::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const CustomIndex & indices) const + { + //eigen_assert(checkIndexRange(indices)); /* already in coeff */ + return coeff(internal::customIndex2Array(indices)); + } + + /** Normal Index */ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const { - eigen_assert(checkIndexRange(indices)); + //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeff(indices); } @@ -228,12 +270,23 @@ class Tensor : public TensorBase& indices) { - eigen_assert(checkIndexRange(indices)); + //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeffRef(indices); } + /** Custom Index */ + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const CustomIndex & indices) + { + //eigen_assert(checkIndexRange(indices)); /* already in coeff */ + return coeffRef(internal::customIndex2Array(indices)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) { eigen_assert(index >= 0 && index < size()); @@ -295,12 +348,20 @@ class Tensor : public TensorBase& dimensions) : m_storage(internal::array_prod(dimensions), dimensions) { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } + /** Custom Dimension (delegating constructor c++11) */ + template::value) ) + > + inline explicit Tensor(const CustomDimension & dimensions) : Tensor(internal::customIndex2Array(dimensions)) + {} + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(const TensorBase& other) @@ -341,7 +402,7 @@ class Tensor : public TensorBase EIGEN_DEVICE_FUNC + template EIGEN_DEVICE_FUNC void resize(Index firstDimension, IndexTypes... otherDimensions) { // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. @@ -350,6 +411,7 @@ class Tensor : public TensorBase& dimensions) { std::size_t i; @@ -367,6 +429,8 @@ class Tensor : public TensorBase& dimensions) { array dims; for (std::size_t i = 0; i < NumIndices; ++i) { @@ -375,6 +439,17 @@ class Tensor : public TensorBase::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(const CustomDimension & dimensions) + { + //eigen_assert(checkIndexRange(indices)); /* already in coeff */ + return coeffRef(internal::customIndex2Array(dimensions)); + } + + #ifndef EIGEN_EMULATE_CXX11_META_H template EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 7dfa04760..60a6c1469 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -34,6 +34,9 @@ template <> struct max_n_1<0> { + + + #if defined(EIGEN_HAS_CONSTEXPR) #define EIGEN_CONSTEXPR constexpr #else @@ -83,6 +86,54 @@ bool operator!=(const Tuple& x, const Tuple& y) { return !(x == y); } + + + +namespace internal{ + + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndex2Array(const IndexType & idx, numeric_list) { + return { idx(Is)... }; + } + + /** Make an array (for index/dimensions) out of a custom index */ + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndex2Array(const IndexType & idx) { + return customIndex2Array(idx, typename gen_numeric_list::type{}); + } + + + template + struct is_base_of + { + + typedef char (&yes)[1]; + typedef char (&no)[2]; + + template + struct Host + { + operator BB*() const; + operator DD*(); + }; + + template + static yes check(D*, T); + static no check(B*, int); + + static const bool value = sizeof(check(Host(), int())) == sizeof(yes); + }; + +} + + + + + + + #undef EIGEN_CONSTEXPR } // namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h new file mode 100644 index 000000000..8cb46e703 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H + + +/** use this macro in sfinae selection in templated functions + * + * template::value , int >::type = 0 + * > + * void foo(){} + * + * becomes => + * + * template::value ) + * > + * void foo(){} + */ + +#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ + typename internal::enable_if< ( __condition__ ) , int >::type = 0 + + +#endif diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 7a1737edd..e9656f404 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -142,6 +142,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_io "-std=c++0x") ei_add_test(cxx11_tensor_generator "-std=c++0x") ei_add_test(cxx11_tensor_custom_op "-std=c++0x") + ei_add_test(cxx11_tensor_customIndex "-std=c++0x") # These tests needs nvcc # ei_add_test(cxx11_tensor_device "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_customIndex.cpp b/unsupported/test/cxx11_tensor_customIndex.cpp new file mode 100644 index 000000000..6ec809890 --- /dev/null +++ b/unsupported/test/cxx11_tensor_customIndex.cpp @@ -0,0 +1,41 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +#include +#include + +using Eigen::Tensor; + + +template +static void test_customIndex() { + + Tensor tensor(2, 3, 5, 7); + + using NormalIndex = DSizes; + using CustomIndex = Matrix; + + tensor.setRandom(); + + CustomIndex coeffC(1,2,4,1); + NormalIndex coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff( coeffC ), tensor.coeff( coeff )); + VERIFY_IS_EQUAL(tensor.coeffRef( coeffC ), tensor.coeffRef( coeff )); + +} + + +void test_cxx11_tensor_customIndex() { + CALL_SUBTEST(test_customIndex()); + CALL_SUBTEST(test_customIndex()); +} From 7b34834f64aa03731dfb5bb01efc005820753932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20N=C3=BCtzi?= Date: Fri, 9 Oct 2015 19:08:14 +0200 Subject: [PATCH 101/344] name changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit user: Gabriel Nützi branch 'default' changed unsupported/Eigen/CXX11/src/Tensor/Tensor.h --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 56 ++++++++++----------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index f9d367e0e..2b892571e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -88,9 +88,9 @@ class Tensor : public TensorBase m_storage; - template + template struct isOfNormalIndex{ - static const bool value = internal::is_base_of< array, CustomIndex >::value; + static const bool value = internal::is_base_of< array, CustomIndices >::value; }; public: @@ -121,20 +121,20 @@ class Tensor : public TensorBase& indices) const { eigen_internal_assert(checkIndexRange(indices)); return m_storage.data()[linearizedIndex(indices)]; } - /** Custom Index */ - template::value) ) + // custom indices + template::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const CustomIndex & indices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const CustomIndices & indices) const { - return coeff(internal::customIndex2Array(indices)); + return coeff(internal::CustomIndices2Array(indices)); } @@ -155,20 +155,20 @@ class Tensor : public TensorBase& indices) { eigen_internal_assert(checkIndexRange(indices)); return m_storage.data()[linearizedIndex(indices)]; } - /** Custom Index */ - template::value) ) + // custom indices + template::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const CustomIndex & indices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const CustomIndices & indices) { - return coeffRef(internal::customIndex2Array(indices)); + return coeffRef(internal::CustomIndices2Array(indices)); } @@ -209,17 +209,17 @@ class Tensor : public TensorBase::value) ) + // custom indices + template::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const CustomIndex & indices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const CustomIndices & indices) const { //eigen_assert(checkIndexRange(indices)); /* already in coeff */ - return coeff(internal::customIndex2Array(indices)); + return coeff(internal::CustomIndices2Array(indices)); } - /** Normal Index */ + // normal indices EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const { //eigen_assert(checkIndexRange(indices)); /* already in coeff */ @@ -270,21 +270,21 @@ class Tensor : public TensorBase& indices) { //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeffRef(indices); } - /** Custom Index */ - template::value) ) + // custom indices + template::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const CustomIndex & indices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const CustomIndices & indices) { //eigen_assert(checkIndexRange(indices)); /* already in coeff */ - return coeffRef(internal::customIndex2Array(indices)); + return coeffRef(internal::CustomIndices2Array(indices)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) @@ -359,7 +359,7 @@ class Tensor : public TensorBase::value) ) > - inline explicit Tensor(const CustomDimension & dimensions) : Tensor(internal::customIndex2Array(dimensions)) + inline explicit Tensor(const CustomDimension & dimensions) : Tensor(internal::CustomIndices2Array(dimensions)) {} template @@ -446,7 +446,7 @@ class Tensor : public TensorBase(dimensions)); + return coeffRef(internal::CustomIndices2Array(dimensions)); } From fc7478c04d16b8585d6eaff1ae2f473e247aaf2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20N=C3=BCtzi?= Date: Fri, 9 Oct 2015 19:10:08 +0200 Subject: [PATCH 102/344] =?UTF-8?q?name=20changes=202=20user:=20Gabriel=20?= =?UTF-8?q?N=C3=BCtzi=20=20branch=20'default'=20changed=20?= =?UTF-8?q?unsupported/Eigen/CXX11/src/Tensor/Tensor.h=20changed=20unsuppo?= =?UTF-8?q?rted/Eigen/CXX11/src/Tensor/TensorMeta.h?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 12 ++++++------ unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 2b892571e..d59fd21dc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -134,7 +134,7 @@ class Tensor : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const CustomIndices & indices) const { - return coeff(internal::CustomIndices2Array(indices)); + return coeff(internal::customIndices2Array(indices)); } @@ -168,7 +168,7 @@ class Tensor : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const CustomIndices & indices) { - return coeffRef(internal::CustomIndices2Array(indices)); + return coeffRef(internal::customIndices2Array(indices)); } @@ -216,7 +216,7 @@ class Tensor : public TensorBase(indices)); + return coeff(internal::customIndices2Array(indices)); } // normal indices @@ -284,7 +284,7 @@ class Tensor : public TensorBase(indices)); + return coeffRef(internal::customIndices2Array(indices)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) @@ -359,7 +359,7 @@ class Tensor : public TensorBase::value) ) > - inline explicit Tensor(const CustomDimension & dimensions) : Tensor(internal::CustomIndices2Array(dimensions)) + inline explicit Tensor(const CustomDimension & dimensions) : Tensor(internal::customIndices2Array(dimensions)) {} template @@ -446,7 +446,7 @@ class Tensor : public TensorBase(dimensions)); + return coeffRef(internal::customIndices2Array(dimensions)); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 60a6c1469..01aedd3c9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -93,15 +93,15 @@ namespace internal{ template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - array customIndex2Array(const IndexType & idx, numeric_list) { + array customIndices2Array(const IndexType & idx, numeric_list) { return { idx(Is)... }; } /** Make an array (for index/dimensions) out of a custom index */ template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - array customIndex2Array(const IndexType & idx) { - return customIndex2Array(idx, typename gen_numeric_list::type{}); + array customIndices2Array(const IndexType & idx) { + return customIndices2Array(idx, typename gen_numeric_list::type{}); } From b4c79ee1d3d7b44e58f2bea48cd597aa0fa7e007 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 13 Oct 2015 11:30:41 +0200 Subject: [PATCH 103/344] Update custom setFromTripplets API to allow passing a functor object, and add a collapseDuplicates method to cleanup the API. Also add respective unit test --- Eigen/src/SparseCore/SparseMatrix.h | 35 ++++++++++++++++------------- test/sparse_basic.cpp | 22 ++++++++++++++---- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 22a6bd803..5e2b14554 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -437,11 +437,13 @@ class SparseMatrix template void setFromTriplets(const InputIterators& begin, const InputIterators& end); - template - void setFromTriplets(const InputIterators& begin, const InputIterators& end); + template + void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); + + void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } template - void sumupDuplicates(); + void collapseDuplicates(DupFunctor dup_func = DupFunctor()); //--- @@ -894,9 +896,8 @@ private: namespace internal { template -void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, int Options = 0) +void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, DupFunctor dup_func) { - EIGEN_UNUSED_VARIABLE(Options); enum { IsRowMajor = SparseMatrixType::IsRowMajor }; typedef typename SparseMatrixType::Scalar Scalar; typedef typename SparseMatrixType::StorageIndex StorageIndex; @@ -919,7 +920,7 @@ void set_from_triplets(const InputIterator& begin, const InputIterator& end, Spa trMat.insertBackUncompressed(it->row(),it->col()) = it->value(); // pass 3: - trMat.template sumupDuplicates(); + trMat.collapseDuplicates(dup_func); } // pass 4: transposed copy -> implicit sorting @@ -970,25 +971,29 @@ template template void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets, internal::scalar_sum_op >(begin, end, *this); + internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); } -/** The same as setFromTriplets but when duplicates are met the functor \a DupFunctor is applied: +/** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: * \code - * value = DupFunctor()(OldValue, NewValue) + * value = dup_func(OldValue, NewValue) * \endcode - */ + * Here is a C++11 example keeping the latest entry only: + * \code + * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; }); + * \endcode + */ template -template -void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) +template +void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func) { - internal::set_from_triplets, DupFunctor>(begin, end, *this); + internal::set_from_triplets, DupFunctor>(begin, end, *this, dup_func); } /** \internal */ template template -void SparseMatrix::sumupDuplicates() +void SparseMatrix::collapseDuplicates(DupFunctor dup_func) { eigen_assert(!isCompressed()); // TODO, in practice we should be able to use m_innerNonZeros for that task @@ -1006,7 +1011,7 @@ void SparseMatrix::sumupDuplicates() if(wi(i)>=start) { // we already meet this entry => accumulate it - m_data.value(wi(i)) = DupFunctor()(m_data.value(wi(i)), m_data.value(k)); + m_data.value(wi(i)) = dup_func(m_data.value(wi(i)), m_data.value(k)); } else { diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 95bbfab0e..993f7840c 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -258,19 +258,33 @@ template void sparse_basic(const SparseMatrixType& re std::vector triplets; Index ntriplets = rows*cols; triplets.reserve(ntriplets); - DenseMatrix refMat(rows,cols); - refMat.setZero(); + DenseMatrix refMat_sum = DenseMatrix::Zero(rows,cols); + DenseMatrix refMat_prod = DenseMatrix::Zero(rows,cols); + DenseMatrix refMat_last = DenseMatrix::Zero(rows,cols); + for(Index i=0;i(0,StorageIndex(rows-1)); StorageIndex c = internal::random(0,StorageIndex(cols-1)); Scalar v = internal::random(); triplets.push_back(TripletType(r,c,v)); - refMat(r,c) += v; + refMat_sum(r,c) += v; + if(std::abs(refMat_prod(r,c))==0) + refMat_prod(r,c) = v; + else + refMat_prod(r,c) *= v; + refMat_last(r,c) = v; } SparseMatrixType m(rows,cols); m.setFromTriplets(triplets.begin(), triplets.end()); - VERIFY_IS_APPROX(m, refMat); + VERIFY_IS_APPROX(m, refMat_sum); + + m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies()); + VERIFY_IS_APPROX(m, refMat_prod); +#if (defined(__cplusplus) && __cplusplus >= 201103L) + m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; }); + VERIFY_IS_APPROX(m, refMat_last); +#endif } // test Map From 2598f3987edc704d9c95e1c207d3533e26314ca0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 14 Oct 2015 10:12:58 +0200 Subject: [PATCH 104/344] Add a plain_object_eval<> helper returning a plain object type based on evaluator's Flags, and base nested_eval on it. --- Eigen/src/Core/util/XprHelper.h | 23 +++++++++++++------ .../ConservativeSparseSparseProduct.h | 2 +- Eigen/src/SparseCore/SparseUtil.h | 21 ++++++++++------- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index c85a6be80..624d8a83b 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -233,33 +233,33 @@ template struct size_of_xpr_at_compile_time */ template::StorageKind> struct plain_matrix_type; -template struct plain_matrix_type_dense; +template struct plain_matrix_type_dense; template struct plain_matrix_type { - typedef typename plain_matrix_type_dense::XprKind>::type type; + typedef typename plain_matrix_type_dense::XprKind, traits::Flags>::type type; }; template struct plain_matrix_type { typedef typename T::PlainObject type; }; -template struct plain_matrix_type_dense +template struct plain_matrix_type_dense { typedef Matrix::Scalar, traits::RowsAtCompileTime, traits::ColsAtCompileTime, - AutoAlign | (traits::Flags&RowMajorBit ? RowMajor : ColMajor), + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), traits::MaxRowsAtCompileTime, traits::MaxColsAtCompileTime > type; }; -template struct plain_matrix_type_dense +template struct plain_matrix_type_dense { typedef Array::Scalar, traits::RowsAtCompileTime, traits::ColsAtCompileTime, - AutoAlign | (traits::Flags&RowMajorBit ? RowMajor : ColMajor), + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), traits::MaxRowsAtCompileTime, traits::MaxColsAtCompileTime > type; @@ -303,6 +303,15 @@ struct eval, Dense> }; +/* similar to plain_matrix_type, but using the evaluator's Flags */ +template::StorageKind> struct plain_object_eval; + +template +struct plain_object_eval +{ + typedef typename plain_matrix_type_dense::XprKind, evaluator::Flags>::type type; +}; + /* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major */ @@ -385,7 +394,7 @@ struct transfer_constness * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. * \param PlainObject the type of the temporary if needed. */ -template::type> struct nested_eval +template::type> struct nested_eval { enum { // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 6e664515d..a61ceb7cc 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -138,7 +138,7 @@ struct conservative_sparse_sparse_product_selector RowMajorMatrix; typedef SparseMatrix ColMajorMatrixAux; - typedef typename sparse_eval::type ColMajorMatrix; + typedef typename sparse_eval::type ColMajorMatrix; // If the result is tall and thin (in the extreme case a column vector) // then it is faster to sort the coefficients inplace instead of transposing twice. diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index 75c604f79..62f0f6864 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -74,20 +74,20 @@ template class SparseSymmetricPermutationProduct; namespace internal { -template struct sparse_eval; +template struct sparse_eval; template struct eval - : public sparse_eval::RowsAtCompileTime,traits::ColsAtCompileTime> + : sparse_eval::RowsAtCompileTime,traits::ColsAtCompileTime,traits::Flags> {}; -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; typedef typename traits::StorageIndex _StorageIndex; public: typedef SparseVector<_Scalar, RowMajor, _StorageIndex> type; }; -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; typedef typename traits::StorageIndex _StorageIndex; public: @@ -95,15 +95,15 @@ template struct sparse_eval { }; // TODO this seems almost identical to plain_matrix_type -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; typedef typename traits::StorageIndex _StorageIndex; - enum { _Options = ((traits::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; + enum { _Options = ((Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; public: typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type; }; -template struct sparse_eval { +template struct sparse_eval { typedef typename traits::Scalar _Scalar; public: typedef Matrix<_Scalar, 1, 1> type; @@ -118,10 +118,15 @@ template struct plain_matrix_type typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type; }; +template +struct plain_object_eval + : sparse_eval::RowsAtCompileTime,traits::ColsAtCompileTime, evaluator::Flags> +{}; + template struct solve_traits { - typedef typename sparse_eval::type PlainObject; + typedef typename sparse_eval::Flags>::type PlainObject; }; template From 527fc4bc86380eee9e9d77fd1890556da8070fc3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 14 Oct 2015 10:14:47 +0200 Subject: [PATCH 105/344] Fix ambiguous instantiation issues of product_evaluator. --- Eigen/src/Core/ProductEvaluators.h | 15 +++++++-------- Eigen/src/Core/util/ForwardDeclarations.h | 3 +-- Eigen/src/SparseCore/SparseDenseProduct.h | 4 ++-- Eigen/src/SparseCore/SparseDiagonalProduct.h | 4 ++-- Eigen/src/SparseCore/SparseSelfAdjointView.h | 4 ++-- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index eebd1a9d4..04dc08957 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -97,8 +97,7 @@ struct evaluator_traits > // This is the default evaluator implementation for products: // It creates a temporary and call generic_product_impl template -struct product_evaluator, ProductTag, LhsShape, RhsShape, typename traits::Scalar, typename traits::Scalar, - EnableIf<(Options==DefaultProduct || Options==AliasFreeProduct)> > +struct product_evaluator, ProductTag, LhsShape, RhsShape> : public evaluator::PlainObject> { typedef Product XprType; @@ -407,7 +406,7 @@ template -struct product_evaluator, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > +struct product_evaluator, ProductTag, DenseShape, DenseShape> : evaluator_base > { typedef Product XprType; @@ -540,12 +539,12 @@ protected: }; template -struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > - : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> { typedef Product XprType; typedef Product BaseProduct; - typedef product_evaluator Base; + typedef product_evaluator Base; enum { Flags = Base::Flags | EvalBeforeNestingBit }; @@ -769,7 +768,7 @@ protected: // diagonal * dense template -struct product_evaluator, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> : diagonal_product_evaluator_base, OnTheLeft> { typedef diagonal_product_evaluator_base, OnTheLeft> Base; @@ -815,7 +814,7 @@ struct product_evaluator, ProductTag, DiagonalSha // dense * diagonal template -struct product_evaluator, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> : diagonal_product_evaluator_base, OnTheRight> { typedef diagonal_product_evaluator_base, OnTheRight> Base; diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 7c20fed5e..c2d5f4316 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -160,8 +160,7 @@ template< typename T, typename LhsShape = typename evaluator_traits::Shape, typename RhsShape = typename evaluator_traits::Shape, typename LhsScalar = typename traits::Scalar, - typename RhsScalar = typename traits::Scalar, - typename = EnableIf // extra template parameter for SFINAE-based specialization + typename RhsScalar = typename traits::Scalar > struct product_evaluator; } diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 3710e05a6..2e34ae74c 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -278,7 +278,7 @@ protected: // sparse * dense outer product template -struct product_evaluator, OuterProduct, SparseShape, DenseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, OuterProduct, SparseShape, DenseShape> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; @@ -293,7 +293,7 @@ struct product_evaluator, OuterProduct, Sparse }; template -struct product_evaluator, OuterProduct, DenseShape, SparseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, OuterProduct, DenseShape, SparseShape> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index 42e29cf70..cf31e5a53 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -35,7 +35,7 @@ template struct sparse_diagonal_product_evaluator; template -struct product_evaluator, ProductTag, DiagonalShape, SparseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, DiagonalShape, SparseShape> : public sparse_diagonal_product_evaluator { typedef Product XprType; @@ -46,7 +46,7 @@ struct product_evaluator, ProductTag, Diagonal }; template -struct product_evaluator, ProductTag, SparseShape, DiagonalShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseShape, DiagonalShape> : public sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> { typedef Product XprType; diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 97e7293c7..46c6ce1d3 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -336,7 +336,7 @@ struct generic_product_impl -struct product_evaluator, ProductTag, SparseSelfAdjointShape, SparseShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseSelfAdjointShape, SparseShape> : public evaluator::PlainObject> { typedef Product XprType; @@ -356,7 +356,7 @@ protected: }; template -struct product_evaluator, ProductTag, SparseShape, SparseSelfAdjointShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseShape, SparseSelfAdjointShape> : public evaluator::PlainObject> { typedef Product XprType; From c0adf6e38d9f9d3a8f8e5c1ff4c2c3939cf0e070 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 14 Oct 2015 10:16:48 +0200 Subject: [PATCH 106/344] Fix perm*sparse return type and nesting, and add several sanity checks for perm*sparse --- Eigen/src/SparseCore/SparseMatrix.h | 3 + Eigen/src/SparseCore/SparsePermutation.h | 63 ++++++++++------- test/sparse_permutations.cpp | 87 +++++++++++++++++++----- 3 files changed, 109 insertions(+), 44 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 5e2b14554..f4d0a28dc 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -1045,6 +1045,9 @@ EIGEN_DONT_INLINE SparseMatrix& SparseMatrix::Flags & RowMajorBit); if (needToTranspose) { + #ifdef EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN + EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN + #endif // two passes algorithm: // 1 - compute the number of coeffs per dest inner vector // 2 - do the actual copy/eval diff --git a/Eigen/src/SparseCore/SparsePermutation.h b/Eigen/src/SparseCore/SparsePermutation.h index 3c58e3b4f..ef38357ae 100644 --- a/Eigen/src/SparseCore/SparsePermutation.h +++ b/Eigen/src/SparseCore/SparsePermutation.h @@ -16,15 +16,17 @@ namespace Eigen { namespace internal { -template -struct permutation_matrix_product +template +struct permutation_matrix_product { - typedef typename remove_all::type MatrixTypeNestedCleaned; - typedef typename MatrixTypeNestedCleaned::Scalar Scalar; - typedef typename MatrixTypeNestedCleaned::StorageIndex StorageIndex; + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + typedef typename MatrixTypeCleaned::Scalar Scalar; + typedef typename MatrixTypeCleaned::StorageIndex StorageIndex; enum { - SrcStorageOrder = MatrixTypeNestedCleaned::Flags&RowMajorBit ? RowMajor : ColMajor, + SrcStorageOrder = MatrixTypeCleaned::Flags&RowMajorBit ? RowMajor : ColMajor, MoveOuter = SrcStorageOrder==RowMajor ? Side==OnTheLeft : Side==OnTheRight }; @@ -33,8 +35,9 @@ struct permutation_matrix_product SparseMatrix >::type ReturnType; template - static inline void run(Dest& dst, const PermutationType& perm, const MatrixType& mat) + static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) { + MatrixType mat(xpr); if(MoveOuter) { SparseMatrix tmp(mat.rows(), mat.cols()); @@ -50,7 +53,7 @@ struct permutation_matrix_product Index jp = perm.indices().coeff(j); Index jsrc = ((Side==OnTheRight) ^ Transposed) ? jp : j; Index jdst = ((Side==OnTheLeft) ^ Transposed) ? jp : j; - for(typename MatrixTypeNestedCleaned::InnerIterator it(mat,jsrc); it; ++it) + for(typename MatrixTypeCleaned::InnerIterator it(mat,jsrc); it; ++it) tmp.insertByOuterInner(jdst,it.index()) = it.value(); } dst = tmp; @@ -67,11 +70,11 @@ struct permutation_matrix_product perm_cpy = perm.transpose(); for(Index j=0; j struct product_promote_storage_type >::PlainObject template -struct product_evaluator, ProductTag, PermutationShape, SparseShape, typename traits::Scalar, typename traits::Scalar> - : public evaluator::ReturnType> +struct product_evaluator, ProductTag, PermutationShape, SparseShape> + : public evaluator::ReturnType> { typedef Product XprType; - typedef typename permutation_matrix_product::ReturnType PlainObject; + typedef typename permutation_matrix_product::ReturnType PlainObject; typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + explicit product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } - -protected: + +protected: PlainObject m_result; }; template -struct product_evaluator, ProductTag, SparseShape, PermutationShape, typename traits::Scalar, typename traits::Scalar> +struct product_evaluator, ProductTag, SparseShape, PermutationShape > : public evaluator::ReturnType> { typedef Product XprType; typedef typename permutation_matrix_product::ReturnType PlainObject; typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + explicit product_evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } - -protected: + +protected: PlainObject m_result; }; @@ -132,34 +143,34 @@ protected: /** \returns the matrix with the permutation applied to the columns */ template -inline const Product +inline const Product operator*(const SparseMatrixBase& matrix, const PermutationBase& perm) -{ return Product(matrix.derived(), perm.derived()); } +{ return Product(matrix.derived(), perm.derived()); } /** \returns the matrix with the permutation applied to the rows */ template -inline const Product +inline const Product operator*( const PermutationBase& perm, const SparseMatrixBase& matrix) -{ return Product(perm.derived(), matrix.derived()); } +{ return Product(perm.derived(), matrix.derived()); } /** \returns the matrix with the inverse permutation applied to the columns. */ template -inline const Product > +inline const Product, AliasFreeProduct> operator*(const SparseMatrixBase& matrix, const InverseImpl& tperm) { - return Product >(matrix.derived(), tperm.derived()); + return Product, AliasFreeProduct>(matrix.derived(), tperm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template -inline const Product, SparseDerived> +inline const Product, SparseDerived, AliasFreeProduct> operator*(const InverseImpl& tperm, const SparseMatrixBase& matrix) { - return Product, SparseDerived>(tperm.derived(), matrix.derived()); + return Product, SparseDerived, AliasFreeProduct>(tperm.derived(), matrix.derived()); } } // end namespace Eigen diff --git a/test/sparse_permutations.cpp b/test/sparse_permutations.cpp index dec586776..c2e1d84a3 100644 --- a/test/sparse_permutations.cpp +++ b/test/sparse_permutations.cpp @@ -1,14 +1,46 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2011-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +static long int nb_transposed_copies; +#define EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN {nb_transposed_copies++;} +#define VERIFY_TRANSPOSITION_COUNT(XPR,N) {\ + nb_transposed_copies = 0; \ + XPR; \ + if(nb_transposed_copies!=N) std::cerr << "nb_transposed_copies == " << nb_transposed_copies << "\n"; \ + VERIFY( (#XPR) && nb_transposed_copies==N ); \ + } + #include "sparse.h" +template +bool is_sorted(const T& mat) { + for(Index k = 0; k=it.index()) + return false; + prev = it.index(); + } + } + return true; +} + +template +typename internal::nested_eval::type eval(const T &xpr) +{ + VERIFY( int(internal::nested_eval::type::Flags&RowMajorBit) == int(internal::evaluator::Flags&RowMajorBit) ); + return xpr; +} + template void sparse_permutations(const SparseMatrixType& ref) { const Index rows = ref.rows(); @@ -18,6 +50,8 @@ template void sparse_permutations(c typedef SparseMatrix OtherSparseMatrixType; typedef Matrix DenseMatrix; typedef Matrix VectorI; +// bool IsRowMajor1 = SparseMatrixType::IsRowMajor; +// bool IsRowMajor2 = OtherSparseMatrixType::IsRowMajor; double density = (std::max)(8./(rows*cols), 0.01); @@ -42,58 +76,69 @@ template void sparse_permutations(c randomPermutationVector(pi, cols); p.indices() = pi; - res = mat*p; + VERIFY( is_sorted( eval(mat*p) )); + VERIFY( is_sorted( res = mat*p )); + VERIFY_TRANSPOSITION_COUNT( eval(mat*p), 0); + //VERIFY_TRANSPOSITION_COUNT( res = mat*p, IsRowMajor ? 1 : 0 ); res_d = mat_d*p; VERIFY(res.isApprox(res_d) && "mat*p"); - res = p*mat; + VERIFY( is_sorted( eval(p*mat) )); + VERIFY( is_sorted( res = p*mat )); + VERIFY_TRANSPOSITION_COUNT( eval(p*mat), 0); res_d = p*mat_d; VERIFY(res.isApprox(res_d) && "p*mat"); - res = mat*p.inverse(); + VERIFY( is_sorted( (mat*p).eval() )); + VERIFY( is_sorted( res = mat*p.inverse() )); + VERIFY_TRANSPOSITION_COUNT( eval(mat*p.inverse()), 0); res_d = mat*p.inverse(); VERIFY(res.isApprox(res_d) && "mat*inv(p)"); - res = p.inverse()*mat; + VERIFY( is_sorted( (p*mat+p*mat).eval() )); + VERIFY( is_sorted( res = p.inverse()*mat )); + VERIFY_TRANSPOSITION_COUNT( eval(p.inverse()*mat), 0); res_d = p.inverse()*mat_d; VERIFY(res.isApprox(res_d) && "inv(p)*mat"); - res = mat.twistedBy(p); + VERIFY( is_sorted( (p * mat * p.inverse()).eval() )); + VERIFY( is_sorted( res = mat.twistedBy(p) )); + VERIFY_TRANSPOSITION_COUNT( eval(p * mat * p.inverse()), 0); res_d = (p * mat_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "p*mat*inv(p)"); - res = mat.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); - res = mat.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); - res = up.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p_null) )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); - res = lo.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p_null) )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); - res = mat.template selfadjointView(); + VERIFY( is_sorted( res = mat.template selfadjointView() )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); - res = mat.template selfadjointView(); + VERIFY( is_sorted( res = mat.template selfadjointView() )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); - res = up.template selfadjointView(); + VERIFY( is_sorted( res = up.template selfadjointView() )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); - res = lo.template selfadjointView(); + VERIFY( is_sorted( res = lo.template selfadjointView() )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); @@ -150,19 +195,19 @@ template void sparse_permutations(c VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to lower"); - res = mat.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); res_d = (p * up_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "full selfadjoint upper twisted to full"); - res = mat.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); res_d = (p * lo_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "full selfadjoint lower twisted to full"); - res = up.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p) )); res_d = (p * up_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to full"); - res = lo.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p) )); res_d = (p * lo_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "lower selfadjoint twisted to full"); } @@ -182,4 +227,10 @@ void test_sparse_permutations() CALL_SUBTEST_1(( sparse_permutations_all(s) )); CALL_SUBTEST_2(( sparse_permutations_all >(s) )); } + + VERIFY((internal::is_same,OnTheRight,false,SparseShape>::ReturnType, + typename internal::nested_eval,PermutationMatrix,AliasFreeProduct>,1>::type>::value)); + + VERIFY((internal::is_same,OnTheLeft,false,SparseShape>::ReturnType, + typename internal::nested_eval,SparseMatrix,AliasFreeProduct>,1>::type>::value)); } From 6585efc55354b38c65de8c23599e99f3caaca843 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 14 Oct 2015 09:31:37 -0700 Subject: [PATCH 107/344] Tightened the definition of isOfNormalIndex to take into account integer types in addition to arrays of indices Only compile the custom index code when EIGEN_HAS_SFINAE is defined. For the time beeing, EIGEN_HAS_SFINAE is a synonym for EIGEN_HAS_VARIADIC_TEMPLATES, but this might evolve in the future. Moved some code around. --- unsupported/Eigen/CXX11/Tensor | 2 +- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 33 +++++++++---------- .../{TensorMetaMacros.h => TensorMacros.h} | 11 +++++++ .../Eigen/CXX11/src/Tensor/TensorMeta.h | 20 ++--------- unsupported/test/CMakeLists.txt | 2 +- ...ndex.cpp => cxx11_tensor_custom_index.cpp} | 25 +++++++------- 6 files changed, 41 insertions(+), 52 deletions(-) rename unsupported/Eigen/CXX11/src/Tensor/{TensorMetaMacros.h => TensorMacros.h} (84%) rename unsupported/test/{cxx11_tensor_customIndex.cpp => cxx11_tensor_custom_index.cpp} (64%) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index ee3a1cdb6..3331ccb55 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -57,9 +57,9 @@ #endif +#include "src/Tensor/TensorMacros.h" #include "src/Tensor/TensorForwardDeclarations.h" #include "src/Tensor/TensorMeta.h" -#include "src/Tensor/TensorMetaMacros.h" #include "src/Tensor/TensorDeviceType.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index d59fd21dc..57d44baf9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -88,10 +88,14 @@ class Tensor : public TensorBase m_storage; +#ifdef EIGEN_HAS_SFINAE template struct isOfNormalIndex{ - static const bool value = internal::is_base_of< array, CustomIndices >::value; + static const bool is_array = internal::is_base_of, CustomIndices >::value; + static const bool is_int = NumTraits::IsInteger; + static const bool value = is_array | is_int; }; +#endif public: // Metadata @@ -129,6 +133,7 @@ class Tensor : public TensorBase::value) ) > @@ -136,8 +141,7 @@ class Tensor : public TensorBase(indices)); } - - +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { @@ -163,6 +167,7 @@ class Tensor : public TensorBase::value) ) > @@ -170,7 +175,7 @@ class Tensor : public TensorBase(indices)); } - +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { @@ -210,19 +215,19 @@ class Tensor : public TensorBase::value) ) > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const CustomIndices & indices) const { - //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeff(internal::customIndices2Array(indices)); } +#endif // normal indices EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const { - //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeff(indices); } @@ -273,19 +278,19 @@ class Tensor : public TensorBase& indices) { - //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeffRef(indices); } // custom indices +#ifdef EIGEN_HAS_SFINAE template::value) ) > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const CustomIndices & indices) { - //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeffRef(internal::customIndices2Array(indices)); } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) { @@ -355,13 +360,6 @@ class Tensor : public TensorBase::value) ) - > - inline explicit Tensor(const CustomDimension & dimensions) : Tensor(internal::customIndices2Array(dimensions)) - {} - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(const TensorBase& other) @@ -429,7 +427,6 @@ class Tensor : public TensorBase& dimensions) { array dims; @@ -440,15 +437,15 @@ class Tensor : public TensorBase::value) ) > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(const CustomDimension & dimensions) { - //eigen_assert(checkIndexRange(indices)); /* already in coeff */ return coeffRef(internal::customIndices2Array(dimensions)); } - +#endif #ifndef EIGEN_EMULATE_CXX11_META_H template diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h similarity index 84% rename from unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h rename to unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h index 8cb46e703..6d9cc4f38 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMetaMacros.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -26,8 +26,19 @@ * void foo(){} */ +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#define EIGEN_HAS_SFINAE +#endif + #define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ typename internal::enable_if< ( __condition__ ) , int >::type = 0 +#if defined(EIGEN_HAS_CONSTEXPR) +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR +#endif + + #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 01aedd3c9..d1efc1a87 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -32,17 +32,6 @@ template <> struct max_n_1<0> { }; - - - - - -#if defined(EIGEN_HAS_CONSTEXPR) -#define EIGEN_CONSTEXPR constexpr -#else -#define EIGEN_CONSTEXPR -#endif - // Tuple mimics std::pair but works on e.g. nvcc. template struct Tuple { public: @@ -88,7 +77,7 @@ bool operator!=(const Tuple& x, const Tuple& y) { - +#ifdef EIGEN_HAS_SFINAE namespace internal{ template @@ -127,15 +116,10 @@ namespace internal{ }; } +#endif - - - - -#undef EIGEN_CONSTEXPR - } // namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index e9656f404..8865892e6 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -142,7 +142,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_io "-std=c++0x") ei_add_test(cxx11_tensor_generator "-std=c++0x") ei_add_test(cxx11_tensor_custom_op "-std=c++0x") - ei_add_test(cxx11_tensor_customIndex "-std=c++0x") + ei_add_test(cxx11_tensor_custom_index "-std=c++0x") # These tests needs nvcc # ei_add_test(cxx11_tensor_device "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_customIndex.cpp b/unsupported/test/cxx11_tensor_custom_index.cpp similarity index 64% rename from unsupported/test/cxx11_tensor_customIndex.cpp rename to unsupported/test/cxx11_tensor_custom_index.cpp index 6ec809890..ff9545a7a 100644 --- a/unsupported/test/cxx11_tensor_customIndex.cpp +++ b/unsupported/test/cxx11_tensor_custom_index.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2015 Benoit Steiner // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -17,25 +17,22 @@ using Eigen::Tensor; template -static void test_customIndex() { - +static void test_custom_index() { + Tensor tensor(2, 3, 5, 7); - - using NormalIndex = DSizes; - using CustomIndex = Matrix; - tensor.setRandom(); + using NormalIndex = DSizes; + using CustomIndex = Matrix; CustomIndex coeffC(1,2,4,1); NormalIndex coeff(1,2,4,1); - - VERIFY_IS_EQUAL(tensor.coeff( coeffC ), tensor.coeff( coeff )); - VERIFY_IS_EQUAL(tensor.coeffRef( coeffC ), tensor.coeffRef( coeff )); - + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); } -void test_cxx11_tensor_customIndex() { - CALL_SUBTEST(test_customIndex()); - CALL_SUBTEST(test_customIndex()); +void test_cxx11_tensor_custom_index() { + test_custom_index(); + test_custom_index(); } From de1e9f29f4db2c837ffb354c90f9e9fb7df05e85 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 15 Oct 2015 14:58:49 -0700 Subject: [PATCH 108/344] Updated the custom indexing code: we can now use any container that provides the [] operator to index a tensor. Added unit tests to validate the use of std::map and a few more types as valid custom index containers --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 17 ++--- .../Eigen/CXX11/src/Tensor/TensorMeta.h | 8 +-- .../test/cxx11_tensor_custom_index.cpp | 72 +++++++++++++++++-- unsupported/test/cxx11_tensor_simple.cpp | 4 -- 4 files changed, 78 insertions(+), 23 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 57d44baf9..3ac465d24 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -91,7 +91,7 @@ class Tensor : public TensorBase struct isOfNormalIndex{ - static const bool is_array = internal::is_base_of, CustomIndices >::value; + static const bool is_array = internal::is_base_of, CustomIndices>::value; static const bool is_int = NumTraits::IsInteger; static const bool value = is_array | is_int; }; @@ -120,11 +120,8 @@ class Tensor : public TensorBase{{firstIndex, secondIndex, otherIndices...}}); } - - #endif - // normal indices EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const { @@ -137,7 +134,7 @@ class Tensor : public TensorBase::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const CustomIndices & indices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const { return coeff(internal::customIndices2Array(indices)); } @@ -171,7 +168,7 @@ class Tensor : public TensorBase::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const CustomIndices & indices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices) { return coeffRef(internal::customIndices2Array(indices)); } @@ -219,7 +216,7 @@ class Tensor : public TensorBase::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const CustomIndices & indices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const { return coeff(internal::customIndices2Array(indices)); } @@ -286,7 +283,7 @@ class Tensor : public TensorBase::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const CustomIndices & indices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices) { return coeffRef(internal::customIndices2Array(indices)); } @@ -441,9 +438,9 @@ class Tensor : public TensorBase::value) ) > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(const CustomDimension & dimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions) { - return coeffRef(internal::customIndices2Array(dimensions)); + resize(internal::customIndices2Array(dimensions)); } #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index d1efc1a87..07735fa5f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -82,15 +82,15 @@ namespace internal{ template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - array customIndices2Array(const IndexType & idx, numeric_list) { - return { idx(Is)... }; + array customIndices2Array(IndexType& idx, numeric_list) { + return { idx[Is]... }; } /** Make an array (for index/dimensions) out of a custom index */ template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - array customIndices2Array(const IndexType & idx) { - return customIndices2Array(idx, typename gen_numeric_list::type{}); + array customIndices2Array(IndexType& idx) { + return customIndices2Array(idx, typename gen_numeric_list::type{}); } diff --git a/unsupported/test/cxx11_tensor_custom_index.cpp b/unsupported/test/cxx11_tensor_custom_index.cpp index ff9545a7a..4528cc176 100644 --- a/unsupported/test/cxx11_tensor_custom_index.cpp +++ b/unsupported/test/cxx11_tensor_custom_index.cpp @@ -9,6 +9,7 @@ #include "main.h" #include +#include #include #include @@ -17,22 +18,83 @@ using Eigen::Tensor; template -static void test_custom_index() { - +static void test_map_as_index() +{ +#ifdef EIGEN_HAS_SFINAE Tensor tensor(2, 3, 5, 7); tensor.setRandom(); using NormalIndex = DSizes; - using CustomIndex = Matrix; + using CustomIndex = std::map; + CustomIndex coeffC; + coeffC[0] = 1; + coeffC[1] = 2; + coeffC[2] = 4; + coeffC[3] = 1; + NormalIndex coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +template +static void test_matrix_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor tensor(2, 3, 5, 7); + tensor.setRandom(); + + using NormalIndex = DSizes; + using CustomIndex = Matrix; CustomIndex coeffC(1,2,4,1); NormalIndex coeff(1,2,4,1); VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +template +static void test_varlist_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor tensor(2, 3, 5, 7); + tensor.setRandom(); + + DSizes coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff)); +#endif +} + + +template +static void test_sizes_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor tensor(2, 3, 5, 7); + tensor.setRandom(); + + DSizes coeff(1,2,4,1); + Sizes<1,2,4,1> coeffC; + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif } void test_cxx11_tensor_custom_index() { - test_custom_index(); - test_custom_index(); + test_map_as_index(); + test_map_as_index(); + test_matrix_as_index(); + test_matrix_as_index(); + test_varlist_as_index(); + test_varlist_as_index(); + test_sizes_as_index(); + test_sizes_as_index(); } diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp index 8cd2ab7fd..0ce92eed9 100644 --- a/unsupported/test/cxx11_tensor_simple.cpp +++ b/unsupported/test/cxx11_tensor_simple.cpp @@ -293,7 +293,3 @@ void test_cxx11_tensor_simple() CALL_SUBTEST(test_simple_assign()); CALL_SUBTEST(test_resize()); } - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ From 1127ca85863786eb1011109101f581207adf8ede Mon Sep 17 00:00:00 2001 From: Abhijit Kundu Date: Fri, 16 Oct 2015 16:41:33 -0700 Subject: [PATCH 109/344] Added ArpackSupport to cmake install target --- unsupported/Eigen/src/CMakeLists.txt | 1 + unsupported/Eigen/src/Eigenvalues/CMakeLists.txt | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 unsupported/Eigen/src/Eigenvalues/CMakeLists.txt diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt index 8eb2808e3..a7e8c7553 100644 --- a/unsupported/Eigen/src/CMakeLists.txt +++ b/unsupported/Eigen/src/CMakeLists.txt @@ -1,5 +1,6 @@ ADD_SUBDIRECTORY(AutoDiff) ADD_SUBDIRECTORY(BVH) +ADD_SUBDIRECTORY(Eigenvalues) ADD_SUBDIRECTORY(FFT) ADD_SUBDIRECTORY(IterativeSolvers) ADD_SUBDIRECTORY(LevenbergMarquardt) diff --git a/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt b/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt new file mode 100644 index 000000000..1d4387c82 --- /dev/null +++ b/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Eigenvalues_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Eigenvalues_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Eigenvalues COMPONENT Devel + ) From 0ed41bdefab997e5863179454e585e29ba8d60a2 Mon Sep 17 00:00:00 2001 From: Abhijit Kundu Date: Fri, 16 Oct 2015 18:21:02 -0700 Subject: [PATCH 110/344] ArpackSupport was missing here also. --- unsupported/Eigen/CMakeLists.txt | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt index 6faf4585d..6d0cf4f9d 100644 --- a/unsupported/Eigen/CMakeLists.txt +++ b/unsupported/Eigen/CMakeLists.txt @@ -1,7 +1,24 @@ -set(Eigen_HEADERS AdolcForward BVH IterativeSolvers MatrixFunctions MoreVectorization AutoDiff AlignedVector3 Polynomials - FFT NonLinearOptimization SparseExtra IterativeSolvers - NumericalDiff Skyline MPRealSupport OpenGLSupport KroneckerProduct Splines LevenbergMarquardt - ) +set(Eigen_HEADERS + AdolcForward + AlignedVector3 + ArpackSupport + AutoDiff + BVH + FFT + IterativeSolvers + KroneckerProduct + LevenbergMarquardt + MatrixFunctions + MoreVectorization + MPRealSupport + NonLinearOptimization + NumericalDiff + OpenGLSupport + Polynomials + Skyline + SparseExtra + Splines + ) install(FILES ${Eigen_HEADERS} From ac6b2266b939200dfdcbe415b6ca592293f6d401 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 25 Oct 2015 22:00:38 +0100 Subject: [PATCH 111/344] Fix SparseMatrix::insert/coeffRef for non-empty compressed matrix --- Eigen/src/SparseCore/SparseMatrix.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index f4d0a28dc..b27061f9e 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -1132,6 +1132,14 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op for(Index j=1; j<=m_outerSize; ++j) m_outerIndex[j] = end; } + else + { + // turn the matrix into non-compressed mode + m_innerNonZeros = static_cast(std::malloc(m_outerSize * sizeof(StorageIndex))); + if(!m_innerNonZeros) internal::throw_std_bad_alloc(); + for(Index j=0; j Date: Sun, 25 Oct 2015 22:01:58 +0100 Subject: [PATCH 112/344] bug #1088: fix setIdenity for non-compressed sparse-matrix --- Eigen/src/SparseCore/SparseMatrix.h | 5 ++++- test/sparse_basic.cpp | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index b27061f9e..272f1d7b4 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -729,7 +729,8 @@ class SparseMatrix m_data.swap(other.m_data); } - /** Sets *this to the identity matrix */ + /** Sets *this to the identity matrix. + * This function also turns the matrix into compressed mode, and drop any reserved memory. */ inline void setIdentity() { eigen_assert(rows() == cols() && "ONLY FOR SQUARED MATRICES"); @@ -737,6 +738,8 @@ class SparseMatrix Eigen::Map(&this->m_data.index(0), rows()).setLinSpaced(0, StorageIndex(rows()-1)); Eigen::Map(&this->m_data.value(0), rows()).setOnes(); Eigen::Map(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows())); + std::free(m_innerNonZeros); + m_innerNonZeros = 0; } inline SparseMatrix& operator=(const SparseMatrix& other) { diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 993f7840c..e8ebd7000 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -434,6 +434,20 @@ template void sparse_basic(const SparseMatrixType& re SparseMatrixType m1(rows, rows); m1.setIdentity(); VERIFY_IS_APPROX(m1, refMat1); + for(int k=0; k(0,rows-1); + Index j = internal::random(0,rows-1); + Index v = internal::random(); + m1.coeffRef(i,j) = v; + refMat1.coeffRef(i,j) = v; + VERIFY_IS_APPROX(m1, refMat1); + if(internal::random(0,10)<2) + m1.makeCompressed(); + } + m1.setIdentity(); + refMat1.setIdentity(); + VERIFY_IS_APPROX(m1, refMat1); } } From 47d44c2f37b15d43bc63cf257959a1005a929fbf Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 26 Oct 2015 11:46:05 +0100 Subject: [PATCH 113/344] Add missing licence header to some top header files --- Eigen/Cholesky | 7 +++++++ Eigen/CholmodSupport | 7 +++++++ Eigen/Eigenvalues | 7 +++++++ Eigen/Geometry | 7 +++++++ Eigen/Householder | 7 +++++++ Eigen/IterativeLinearSolvers | 7 +++++++ Eigen/Jacobi | 7 +++++++ Eigen/LU | 7 +++++++ Eigen/MetisSupport | 7 +++++++ Eigen/OrderingMethods | 7 +++++++ Eigen/PaStiXSupport | 7 +++++++ Eigen/PardisoSupport | 7 +++++++ Eigen/QR | 7 +++++++ Eigen/QtAlignedMalloc | 6 ++++++ Eigen/SPQRSupport | 7 +++++++ Eigen/SVD | 7 +++++++ Eigen/Sparse | 7 +++++++ Eigen/SparseCore | 7 +++++++ Eigen/SparseQR | 7 +++++++ Eigen/SuperLUSupport | 7 +++++++ Eigen/UmfPackSupport | 7 +++++++ 21 files changed, 146 insertions(+) diff --git a/Eigen/Cholesky b/Eigen/Cholesky index dd0ca911c..705a04cc4 100644 --- a/Eigen/Cholesky +++ b/Eigen/Cholesky @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport index 687cd9777..83e2c1da4 100644 --- a/Eigen/CholmodSupport +++ b/Eigen/CholmodSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H #define EIGEN_CHOLMODSUPPORT_MODULE_H diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index 53c5a73a2..ea93eb303 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_EIGENVALUES_MODULE_H #define EIGEN_EIGENVALUES_MODULE_H diff --git a/Eigen/Geometry b/Eigen/Geometry index 11aea8025..06b736e3f 100644 --- a/Eigen/Geometry +++ b/Eigen/Geometry @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_GEOMETRY_MODULE_H #define EIGEN_GEOMETRY_MODULE_H diff --git a/Eigen/Householder b/Eigen/Householder index 6e348db5c..89cd81b1a 100644 --- a/Eigen/Householder +++ b/Eigen/Householder @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_HOUSEHOLDER_MODULE_H #define EIGEN_HOUSEHOLDER_MODULE_H diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index 027e537c6..957d5750b 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H diff --git a/Eigen/Jacobi b/Eigen/Jacobi index ba8a4dc36..17c1d785a 100644 --- a/Eigen/Jacobi +++ b/Eigen/Jacobi @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_JACOBI_MODULE_H #define EIGEN_JACOBI_MODULE_H diff --git a/Eigen/LU b/Eigen/LU index 132ecc42c..2d70c92de 100644 --- a/Eigen/LU +++ b/Eigen/LU @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_LU_MODULE_H #define EIGEN_LU_MODULE_H diff --git a/Eigen/MetisSupport b/Eigen/MetisSupport index 6a113f7a8..85c41bf34 100644 --- a/Eigen/MetisSupport +++ b/Eigen/MetisSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_METISSUPPORT_MODULE_H #define EIGEN_METISSUPPORT_MODULE_H diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods index 7c0f1ffff..d8ea36193 100644 --- a/Eigen/OrderingMethods +++ b/Eigen/OrderingMethods @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ORDERINGMETHODS_MODULE_H #define EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/Eigen/PaStiXSupport b/Eigen/PaStiXSupport index e7d275f97..3411dface 100644 --- a/Eigen/PaStiXSupport +++ b/Eigen/PaStiXSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PASTIXSUPPORT_MODULE_H #define EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/Eigen/PardisoSupport b/Eigen/PardisoSupport index 99330ce7a..7dc9c7de0 100644 --- a/Eigen/PardisoSupport +++ b/Eigen/PardisoSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PARDISOSUPPORT_MODULE_H #define EIGEN_PARDISOSUPPORT_MODULE_H diff --git a/Eigen/QR b/Eigen/QR index 230cb079a..f74f365f1 100644 --- a/Eigen/QR +++ b/Eigen/QR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_QR_MODULE_H #define EIGEN_QR_MODULE_H diff --git a/Eigen/QtAlignedMalloc b/Eigen/QtAlignedMalloc index 46f7d83b7..4044d5ac5 100644 --- a/Eigen/QtAlignedMalloc +++ b/Eigen/QtAlignedMalloc @@ -1,3 +1,9 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_QTMALLOC_MODULE_H #define EIGEN_QTMALLOC_MODULE_H diff --git a/Eigen/SPQRSupport b/Eigen/SPQRSupport index e3f49bb5a..f9489dcd8 100644 --- a/Eigen/SPQRSupport +++ b/Eigen/SPQRSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPQRSUPPORT_MODULE_H #define EIGEN_SPQRSUPPORT_MODULE_H diff --git a/Eigen/SVD b/Eigen/SVD index dbd37b17a..b353f3f54 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SVD_MODULE_H #define EIGEN_SVD_MODULE_H diff --git a/Eigen/Sparse b/Eigen/Sparse index a540f0eec..a2ef7a665 100644 --- a/Eigen/Sparse +++ b/Eigen/Sparse @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSE_MODULE_H #define EIGEN_SPARSE_MODULE_H diff --git a/Eigen/SparseCore b/Eigen/SparseCore index 48ed967b8..c2a25f9b5 100644 --- a/Eigen/SparseCore +++ b/Eigen/SparseCore @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSECORE_MODULE_H #define EIGEN_SPARSECORE_MODULE_H diff --git a/Eigen/SparseQR b/Eigen/SparseQR index efb2695ba..a6f3b7f7d 100644 --- a/Eigen/SparseQR +++ b/Eigen/SparseQR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSEQR_MODULE_H #define EIGEN_SPARSEQR_MODULE_H diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport index d1eac9464..0ae9f3fdf 100644 --- a/Eigen/SuperLUSupport +++ b/Eigen/SuperLUSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H #define EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/Eigen/UmfPackSupport b/Eigen/UmfPackSupport index 0efad5dee..4a9f46a1e 100644 --- a/Eigen/UmfPackSupport +++ b/Eigen/UmfPackSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H #define EIGEN_UMFPACKSUPPORT_MODULE_H From 4704bdc9c06661f0329ea7d77239a72006177226 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 26 Oct 2015 15:17:52 +0100 Subject: [PATCH 114/344] Make the IterativeLinearSolvers module compatible with MPL2-only mode by defaulting to COLAMDOrdering and NaturalOrdering for ILUT and ILLT respectively. --- .../IncompleteCholesky.h | 11 ++++++++-- .../IterativeLinearSolvers/IncompleteLUT.h | 21 +++++++++++++------ test/CMakeLists.txt | 1 + test/mpl2only.cpp | 20 ++++++++++++++++++ 4 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 test/mpl2only.cpp diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h index 1e2e9f9b9..8f549af82 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h @@ -24,7 +24,8 @@ namespace Eigen { * \tparam _MatrixType The type of the sparse matrix. It is advised to give a row-oriented sparse matrix * \tparam _UpLo The triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. - * \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> + * \tparam _OrderingType The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering, + * unless EIGEN_MPL2_ONLY is defined, in which case the default is NaturalOrdering. * * \implsparsesolverconcept * @@ -38,7 +39,13 @@ namespace Eigen { * \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$. * */ -template > +template +#else +NaturalOrdering +#endif +> class IncompleteCholesky : public SparseSolverBase > { protected: diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index 10b9fcc18..519472377 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -168,7 +168,7 @@ class IncompleteLUT : public SparseSolverBase void _solve_impl(const Rhs& b, Dest& x) const { - x = m_Pinv * b; + x = m_Pinv * b; x = m_lu.template triangularView().solve(x); x = m_lu.template triangularView().solve(x); x = m_P * x; @@ -221,16 +221,25 @@ template void IncompleteLUT::analyzePattern(const _MatrixType& amat) { // Compute the Fill-reducing permutation + // Since ILUT does not perform any numerical pivoting, + // it is highly preferable to keep the diagonal through symmetric permutations. +#ifndef EIGEN_MPL2_ONLY + // To this end, let's symmetrize the pattern and perform AMD on it. SparseMatrix mat1 = amat; SparseMatrix mat2 = amat.transpose(); - // Symmetrize the pattern // FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice. // on the other hand for a really non-symmetric pattern, mat2*mat1 should be prefered... SparseMatrix AtA = mat2 + mat1; - AtA.prune(keep_diag()); - internal::minimum_degree_ordering(AtA, m_P); // Then compute the AMD ordering... - - m_Pinv = m_P.inverse(); // ... and the inverse permutation + AMDOrdering ordering; + ordering(AtA,m_P); + m_Pinv = m_P.inverse(); // cache the inverse permutation +#else + // If AMD is not available, (MPL2-only), then let's use the slower COLAMD routine. + SparseMatrix mat1 = amat; + COLAMDOrdering ordering; + ordering(mat1,m_Pinv); + m_P = m_Pinv.inverse(); +#endif m_analysisIsOk = true; m_factorizationIsOk = false; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9684c90e8..c8a8ba6f4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -255,6 +255,7 @@ ei_add_test(special_numbers) ei_add_test(rvalue_types) ei_add_test(dense_storage) ei_add_test(ctorleak) +ei_add_test(mpl2only) # # ei_add_test(denseLM) diff --git a/test/mpl2only.cpp b/test/mpl2only.cpp new file mode 100644 index 000000000..5ef0d2b2e --- /dev/null +++ b/test/mpl2only.cpp @@ -0,0 +1,20 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_MPL2_ONLY +#include +#include +#include +#include +#include + +int main() +{ + return 0; +} From f93654ae16b261e462ee00c5255072f8dd7d387b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 26 Oct 2015 16:00:25 +0100 Subject: [PATCH 115/344] bug #1098: fix regression introduced when generalizing some compute() methods in changeset 7031a851d45a8526474ac1ac972ad12a48e99f1a . --- Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h | 2 +- test/eigensolver_complex.cpp | 6 ++++++ test/eigensolver_generalized_real.cpp | 8 ++++++++ test/eigensolver_generic.cpp | 6 ++++++ test/eigensolver_selfadjoint.cpp | 6 ++++++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index 4d62708ad..c64555096 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -411,7 +411,7 @@ SelfAdjointEigenSolver& SelfAdjointEigenSolver if(n==1) { - m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); + m_eivalues.coeffRef(0,0) = numext::real(matrix(0,0)); if(computeEigenvectors) m_eivec.setOnes(n,n); m_info = Success; diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp index 0d4e2dc87..ad982ed40 100644 --- a/test/eigensolver_complex.cpp +++ b/test/eigensolver_complex.cpp @@ -125,6 +125,12 @@ template void eigensolver(const MatrixType& m) ComplexEigenSolver eiNaN(a); VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + ComplexEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } } template void eigensolver_verify_assert(const MatrixType& m) diff --git a/test/eigensolver_generalized_real.cpp b/test/eigensolver_generalized_real.cpp index 566a4bdc6..a46a2e50e 100644 --- a/test/eigensolver_generalized_real.cpp +++ b/test/eigensolver_generalized_real.cpp @@ -39,6 +39,14 @@ template void generalized_eigensolver_real(const MatrixType VectorType realEigenvalues = eig.eigenvalues().real(); std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + + // regression test for bug 1098 + { + GeneralizedSelfAdjointEigenSolver eig1(a.adjoint() * a,b.adjoint() * b); + eig1.compute(a.adjoint() * a,b.adjoint() * b); + GeneralizedEigenSolver eig2(a.adjoint() * a,b.adjoint() * b); + eig2.compute(a.adjoint() * a,b.adjoint() * b); + } } void test_eigensolver_generalized_real() diff --git a/test/eigensolver_generic.cpp b/test/eigensolver_generic.cpp index c5441ac4e..c42fcaeba 100644 --- a/test/eigensolver_generic.cpp +++ b/test/eigensolver_generic.cpp @@ -70,6 +70,12 @@ template void eigensolver(const MatrixType& m) EigenSolver eiNaN(a); VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + EigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } } template void eigensolver_verify_assert(const MatrixType& m) diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index 4c3de7a80..a7b4deb5d 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp @@ -156,6 +156,12 @@ template void selfadjointeigensolver(const MatrixType& m) SelfAdjointEigenSolver eiSymmNaN(symmC); VERIFY_IS_EQUAL(eiSymmNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + SelfAdjointEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } } void bug_854() From a5324a131f3816c8312e27a9dc928b8d56d8cf3b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 26 Oct 2015 16:16:24 +0100 Subject: [PATCH 116/344] bug #1092: fix iterative solver ctors for expressions as input --- Eigen/src/IterativeLinearSolvers/BiCGSTAB.h | 3 +- .../ConjugateGradient.h | 3 +- .../LeastSquareConjugateGradient.h | 3 +- test/sparse_solver.h | 65 ++++++++++++------- .../Eigen/src/IterativeSolvers/DGMRES.h | 4 +- .../Eigen/src/IterativeSolvers/GMRES.h | 3 +- .../Eigen/src/IterativeSolvers/MINRES.h | 3 +- 7 files changed, 52 insertions(+), 32 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index 76e86a94a..4be00da47 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -182,7 +182,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - explicit BiCGSTAB(const MatrixType& A) : Base(A) {} + template + explicit BiCGSTAB(const EigenBase& A) : Base(A.derived()) {} ~BiCGSTAB() {} diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index 59092dc18..dbedf28fd 100644 --- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -185,7 +185,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - explicit ConjugateGradient(const MatrixType& A) : Base(A) {} + template + explicit ConjugateGradient(const EigenBase& A) : Base(A.derived()) {} ~ConjugateGradient() {} diff --git a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h index b578b2a7f..1593c57b5 100644 --- a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h @@ -175,7 +175,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - explicit LeastSquaresConjugateGradient(const MatrixType& A) : Base(A) {} + template + explicit LeastSquaresConjugateGradient(const EigenBase& A) : Base(A.derived()) {} ~LeastSquaresConjugateGradient() {} diff --git a/test/sparse_solver.h b/test/sparse_solver.h index a0254ff1c..b67653496 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h @@ -63,32 +63,47 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, VERIFY(xm.isApprox(refX,test_precision())); } - // test initialization ctor + // if not too large, do some extra check: + if(A.rows()<2000) { - Rhs x(b.rows(), b.cols()); - Solver solver2(A); - VERIFY(solver2.info() == Success); - x = solver2.solve(b); - VERIFY(x.isApprox(refX,test_precision())); - } - - // test dense Block as the result and rhs: - { - DenseRhs x(refX.rows(), refX.cols()); - DenseRhs oldb(db); - x.setZero(); - x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); - VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!"); - VERIFY(x.isApprox(refX,test_precision())); - } - - // test uncompressed inputs - { - Mat A2 = A; - A2.reserve((ArrayXf::Random(A.outerSize())+2).template cast().eval()); - solver.compute(A2); - Rhs x = solver.solve(b); - VERIFY(x.isApprox(refX,test_precision())); + // test initialization ctor + { + Rhs x(b.rows(), b.cols()); + Solver solver2(A); + VERIFY(solver2.info() == Success); + x = solver2.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test dense Block as the result and rhs: + { + DenseRhs x(refX.rows(), refX.cols()); + DenseRhs oldb(db); + x.setZero(); + x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); + VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test uncompressed inputs + { + Mat A2 = A; + A2.reserve((ArrayXf::Random(A.outerSize())+2).template cast().eval()); + solver.compute(A2); + Rhs x = solver.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test expression as input + { + solver.compute(0.5*(A+A)); + Rhs x = solver.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + + Solver solver2(0.5*(A+A)); + Rhs x2 = solver2.solve(b); + VERIFY(x2.isApprox(refX,test_precision())); + } } } diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h index 52eb65a2f..ab82e782d 100644 --- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h @@ -134,8 +134,8 @@ class DGMRES : public IterativeSolverBase > * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - DGMRES(const MatrixType& A) : Base(A),m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) - {} + template + explicit DGMRES(const EigenBase& A) : Base(A.derived()), m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) {} ~DGMRES() {} diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h index 05e5862a5..2cfa60140 100644 --- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h @@ -288,7 +288,8 @@ public: * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - GMRES(const MatrixType& A) : Base(A), m_restart(30) {} + template + explicit GMRES(const EigenBase& A) : Base(A.derived()), m_restart(30) {} ~GMRES() {} diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h index c393112a4..84e491fa1 100644 --- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h @@ -227,7 +227,8 @@ namespace Eigen { * this class becomes invalid. Call compute() to update it with the new * matrix A, or modify a copy of A. */ - MINRES(const MatrixType& A) : Base(A) {} + template + explicit MINRES(const EigenBase& A) : Base(A.derived()) {} /** Destructor. */ ~MINRES(){} From e6f8c5c325fca53b53436b6bd8d66749444216bb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 26 Oct 2015 18:20:00 +0100 Subject: [PATCH 117/344] Add support to directly evaluate the product of two sparse matrices within a dense matrix. --- .../ConservativeSparseSparseProduct.h | 85 ++++++++++++++++++- Eigen/src/SparseCore/SparseAssign.h | 8 +- Eigen/src/SparseCore/SparseMatrixBase.h | 2 +- Eigen/src/SparseCore/SparseProduct.h | 34 +++++++- test/sparse_product.cpp | 11 +++ 5 files changed, 132 insertions(+), 8 deletions(-) diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index a61ceb7cc..0f6835846 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -255,6 +255,89 @@ struct conservative_sparse_sparse_product_selector +static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) +{ + typedef typename remove_all::type::Scalar Scalar; + Index cols = rhs.outerSize(); + eigen_assert(lhs.outerSize() == rhs.innerSize()); + + evaluator lhsEval(lhs); + evaluator rhsEval(rhs); + + for (Index j=0; j::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) + { + Scalar y = rhsIt.value(); + Index k = rhsIt.index(); + for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) + { + Index i = lhsIt.index(); + Scalar x = lhsIt.value(); + res.coeffRef(i,j) += x * y; + } + } + } +} + + +} // end namespace internal + +namespace internal { + +template::Flags&RowMajorBit) ? RowMajor : ColMajor, + int RhsStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor> +struct sparse_sparse_to_dense_product_selector; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + internal::sparse_sparse_to_dense_product_impl(lhs, rhs, res); + } +}; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix lhsCol(lhs); + internal::sparse_sparse_to_dense_product_impl(lhsCol, rhs, res); + } +}; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix rhsCol(rhs); + internal::sparse_sparse_to_dense_product_impl(lhs, rhsCol, res); + } +}; + +template +struct sparse_sparse_to_dense_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + Transpose trRes(res); + internal::sparse_sparse_to_dense_product_impl >(rhs, lhs, trRes); + } +}; + + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index e984bbdb3..4b663a59e 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -133,8 +133,8 @@ struct Assignment }; // Sparse to Dense assignment -template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> -struct Assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment { static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { @@ -149,8 +149,8 @@ struct Assignment } }; -template< typename DstXprType, typename SrcXprType, typename Scalar> -struct Assignment, Sparse2Dense, Scalar> +template< typename DstXprType, typename SrcXprType> +struct Assignment, Sparse2Dense> { static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 4e720904e..38eb1c37a 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -281,7 +281,7 @@ template class SparseMatrixBase : public EigenBase // sparse * sparse template - const Product + const Product operator*(const SparseMatrixBase &other) const; // sparse * dense diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index da8919ecc..26680b7a7 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -25,10 +25,10 @@ namespace Eigen { * */ template template -inline const Product +inline const Product SparseMatrixBase::operator*(const SparseMatrixBase &other) const { - return Product(derived(), other.derived()); + return Product(derived(), other.derived()); } namespace internal { @@ -61,6 +61,36 @@ struct generic_product_impl {}; +// Dense = sparse * sparse +template< typename DstXprType, typename Lhs, typename Rhs, int Options/*, typename Scalar*/> +struct Assignment, internal::assign_op, Sparse2Dense/*, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type*/> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + dst.setZero(); + dst += src; + } +}; + +// Dense += sparse * sparse +template< typename DstXprType, typename Lhs, typename Rhs, int Options> +struct Assignment, internal::add_assign_op, Sparse2Dense/*, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type*/> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + { + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + LhsNested lhsNested(src.lhs()); + RhsNested rhsNested(src.rhs()); + internal::sparse_sparse_to_dense_product_selector::type, + typename remove_all::type, DstXprType>::run(lhsNested,rhsNested,dst); + } +}; + template struct evaluator > > : public evaluator::PlainObject> diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp index f1e5b8e4c..8c83f08d7 100644 --- a/test/sparse_product.cpp +++ b/test/sparse_product.cpp @@ -76,6 +76,17 @@ template void sparse_product() VERIFY_IS_APPROX(m4=(m2t.transpose()*m3t.transpose()).pruned(0), refMat4=refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(m4=(m2*m3t.transpose()).pruned(0), refMat4=refMat2*refMat3t.transpose()); + // dense ?= sparse * sparse + VERIFY_IS_APPROX(dm4 =m2*m3, refMat4 =refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=m2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3, refMat4 =refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3, refMat4+=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3t.transpose(), refMat4 =refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3t.transpose(), refMat4+=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 =m2*m3t.transpose(), refMat4 =refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2*m3t.transpose(), refMat4+=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); + // test aliasing m4 = m2; refMat4 = refMat2; VERIFY_IS_APPROX(m4=m4*m3, refMat4=refMat4*refMat3); From f8e7b9590d2ab7030180bc9fdb93e45f8aef3469 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Oct 2015 10:47:37 -0700 Subject: [PATCH 118/344] Fixed compilation error triggered by gcc 4.7 --- unsupported/test/cxx11_tensor_fft.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp index 4aefcc79c..0f6e09106 100644 --- a/unsupported/test/cxx11_tensor_fft.cpp +++ b/unsupported/test/cxx11_tensor_fft.cpp @@ -67,14 +67,14 @@ static void test_fft_complex_input_golden() { array fft; fft[0] = 0; - Tensor, 1, ColMajor, long> forward_output_both_parts = input.template fft(fft); - Tensor, 1, ColMajor, long> reverse_output_both_parts = input.template fft(fft); + Tensor, 1, ColMajor, long> forward_output_both_parts = input.fft(fft); + Tensor, 1, ColMajor, long> reverse_output_both_parts = input.fft(fft); - Tensor forward_output_real_part = input.template fft(fft); - Tensor reverse_output_real_part = input.template fft(fft); + Tensor forward_output_real_part = input.fft(fft); + Tensor reverse_output_real_part = input.fft(fft); - Tensor forward_output_imag_part = input.template fft(fft); - Tensor reverse_output_imag_part = input.template fft(fft); + Tensor forward_output_imag_part = input.fft(fft); + Tensor reverse_output_imag_part = input.fft(fft); VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); @@ -124,14 +124,14 @@ static void test_fft_real_input_golden() { array fft; fft[0] = 0; - Tensor, 1, ColMajor, long> forward_output_both_parts = input.template fft(fft); - Tensor, 1, ColMajor, long> reverse_output_both_parts = input.template fft(fft); + Tensor, 1, ColMajor, long> forward_output_both_parts = input.fft(fft); + Tensor, 1, ColMajor, long> reverse_output_both_parts = input.fft(fft); - Tensor forward_output_real_part = input.template fft(fft); - Tensor reverse_output_real_part = input.template fft(fft); + Tensor forward_output_real_part = input.fft(fft); + Tensor reverse_output_real_part = input.fft(fft); - Tensor forward_output_imag_part = input.template fft(fft); - Tensor reverse_output_imag_part = input.template fft(fft); + Tensor forward_output_imag_part = input.fft(fft); + Tensor reverse_output_imag_part = input.fft(fft); VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); From a3e144727cf0b1cc63dc1cf9a356ecb3ddd9f95f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Oct 2015 10:48:11 -0700 Subject: [PATCH 119/344] Fixed compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index dbc8c2ca9..62f5ff923 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -204,7 +204,7 @@ struct TensorEvaluator, D buf[i] = MakeComplex::value>()(m_impl.coeff(i)); } - for (int i = 0; i < m_fft.size(); ++i) { + for (size_t i = 0; i < m_fft.size(); ++i) { int dim = m_fft[i]; eigen_assert(dim >= 0 && dim < NumDims); Index line_len = m_dimensions[dim]; From 9f721384e080067e05ee1fd165e33aa9c77f25b7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Oct 2015 11:21:27 -0700 Subject: [PATCH 120/344] Added support for empty dimensions --- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index d6ec62a74..89e28bdb5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -52,8 +52,8 @@ struct fixed_size_tensor_index_linearization_helper static inline Index run(array const& indices, const Dimensions& dimensions) { - return array_get(indices) + - dget::value * + return array_get(indices) + + dget::value * fixed_size_tensor_index_linearization_helper::run(indices, dimensions); } }; @@ -62,10 +62,9 @@ template struct fixed_size_tensor_index_linearization_helper { template EIGEN_DEVICE_FUNC - static inline Index run(array const& indices, - const Dimensions&) + static inline Index run(array const&, const Dimensions&) { - return array_get(indices); + return 0; } }; @@ -135,11 +134,11 @@ struct Sizes : internal::numeric_list { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); } }; @@ -222,11 +221,11 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array& indices) const { - return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); } }; @@ -402,22 +401,22 @@ template -struct sizes_match_up_to_dim { - static inline bool run(Dims1&, Dims2&) { +struct sizes_match_below_dim { + static inline bool run(Dims1& dims1, Dims2& dims2) { return false; } }; template -struct sizes_match_up_to_dim { +struct sizes_match_below_dim { static inline bool run(Dims1& dims1, Dims2& dims2) { - return (array_get(dims1) == array_get(dims2)) & - sizes_match_up_to_dim::run(dims1, dims2); + return (array_get(dims1) == array_get(dims2)) & + sizes_match_below_dim::run(dims1, dims2); } }; template -struct sizes_match_up_to_dim { - static inline bool run(Dims1& dims1, Dims2& dims2) { - return (array_get<0>(dims1) == array_get<0>(dims2)); +struct sizes_match_below_dim { + static inline bool run(Dims1&, Dims2&) { + return true; } }; @@ -426,7 +425,7 @@ struct sizes_match_up_to_dim { template bool dimensions_match(Dims1& dims1, Dims2& dims2) { - return internal::sizes_match_up_to_dim::value-1, internal::array_size::value-1>::run(dims1, dims2); + return internal::sizes_match_below_dim::value, internal::array_size::value>::run(dims1, dims2); } } // end namespace Eigen From 9dc236bc837501cd098154b41f81cae99af73858 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Oct 2015 12:41:48 -0700 Subject: [PATCH 121/344] Fixed compilation warning --- unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 96a7d5c20..7cd04a99e 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -354,7 +354,7 @@ struct h_array_reduce template struct h_array_reduce { - constexpr static inline T run(const std::array& arr, T identity) + constexpr static inline T run(const std::array&, T identity) { return identity; } From 1f4c98abb1634bdbdd6583b55ba36dcc09ef5773 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Oct 2015 12:42:55 -0700 Subject: [PATCH 122/344] Fixed compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 89e28bdb5..145ca0d64 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -402,7 +402,7 @@ template struct sizes_match_below_dim { - static inline bool run(Dims1& dims1, Dims2& dims2) { + static inline bool run(Dims1&, Dims2&) { return false; } }; From 1c8312c811344beaa06f7ae9258f66c38337c607 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Oct 2015 14:29:26 -0700 Subject: [PATCH 123/344] Started to add support for tensors of rank 0 --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 31 +++++++++++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorIO.h | 5 ++- .../CXX11/src/Tensor/TensorInitializer.h | 12 +++++++ .../Eigen/CXX11/src/Tensor/TensorStorage.h | 12 +++++-- 4 files changed, 57 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 3ac465d24..0df1345c2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -140,6 +140,12 @@ class Tensor : public TensorBase= 0 && index < size()); @@ -174,6 +180,12 @@ class Tensor : public TensorBase= 0 && index < size()); @@ -234,6 +246,12 @@ class Tensor : public TensorBase::value == 1) { + static const int rank = internal::array_size::value; + if (rank == 0) { + os << tensor.coeff(0); + } else if (rank == 1) { Map > array(const_cast(tensor.data()), total_size); os << array; } else { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h index 4303e3536..ad2a1e6ac 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h @@ -55,6 +55,18 @@ struct Initializer { } }; +template +struct Initializer { + typedef typename traits::Scalar InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>*/* indices*/, + const InitList& v) { + tensor.coeffRef(0) = v; + } +}; + + template void initialize_tensor(TensorEvaluator& tensor, const typename Initializer::NumDimensions>::InitList& vals) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index 9e4cf039d..ee6f14b8f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -71,7 +71,11 @@ class TensorStorage, Options_> typedef DSizes Dimensions; typedef TensorStorage, Options_> Self; - EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() {} + EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() { + if (NumIndices_ == 0) { + m_data = internal::conditional_aligned_new_auto(1); + } + } EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_dimensions(internal::template repeat(0)) {} EIGEN_DEVICE_FUNC TensorStorage(Index size, const array& dimensions) @@ -101,13 +105,17 @@ class TensorStorage, Options_> EIGEN_DEVICE_FUNC void resize(Index size, const array& nbDimensions) { + eigen_assert(size >= 1); const Index currentSz = internal::array_prod(m_dimensions); if(size != currentSz) { internal::conditional_aligned_delete_auto(m_data, currentSz); if (size) m_data = internal::conditional_aligned_new_auto(size); - else + else if (NumIndices_ == 0) { + m_data = internal::conditional_aligned_new_auto(1); + } + else m_data = 0; EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } From 0fc8954282140d00b47ee1d298c4d4bce35aa724 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 10:38:49 +0100 Subject: [PATCH 124/344] Improve readibility of EIGEN_DEBUG_ASSIGN mode. --- Eigen/src/Core/AssignEvaluator.h | 8 ++++---- Eigen/src/Core/util/XprHelper.h | 32 ++++++++++++++++++++++++++++++++ test/vectorization_logic.cpp | 32 +++----------------------------- 3 files changed, 39 insertions(+), 33 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 121a722f2..3667f60f2 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -125,8 +125,8 @@ public: std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; std::cerr.setf(std::ios::hex, std::ios::basefield); - EIGEN_DEBUG_VAR(DstFlags) - EIGEN_DEBUG_VAR(SrcFlags) + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; std::cerr.unsetf(std::ios::hex); EIGEN_DEBUG_VAR(DstAlignment) EIGEN_DEBUG_VAR(SrcAlignment) @@ -141,11 +141,11 @@ public: EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; EIGEN_DEBUG_VAR(UnrollingLimit) EIGEN_DEBUG_VAR(MayUnrollCompletely) EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; std::cerr << std::endl; } #endif diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 624d8a83b..cd93b2320 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -670,6 +670,38 @@ template struct is_same_or_void { enum { value = 1 }; }; template struct is_same_or_void { enum { value = 1 }; }; template<> struct is_same_or_void { enum { value = 1 }; }; +#ifdef EIGEN_DEBUG_ASSIGN +std::string demangle_traversal(int t) +{ + if(t==DefaultTraversal) return "DefaultTraversal"; + if(t==LinearTraversal) return "LinearTraversal"; + if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; + if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; + if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; + return "?"; +} +std::string demangle_unrolling(int t) +{ + if(t==NoUnrolling) return "NoUnrolling"; + if(t==InnerUnrolling) return "InnerUnrolling"; + if(t==CompleteUnrolling) return "CompleteUnrolling"; + return "?"; +} +std::string demangle_flags(int f) +{ + std::string res; + if(f&RowMajorBit) res += " | RowMajor"; + if(f&PacketAccessBit) res += " | Packet"; + if(f&LinearAccessBit) res += " | Linear"; + if(f&LvalueBit) res += " | Lvalue"; + if(f&DirectAccessBit) res += " | Direct"; + if(f&NestByRefBit) res += " | NestByRef"; + if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; + + return res; +} +#endif + } // end namespace internal // we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 6ff38ed11..da60a2f3a 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -11,35 +11,9 @@ #include "main.h" #include -std::string demangle_traversal(int t) -{ - if(t==DefaultTraversal) return "DefaultTraversal"; - if(t==LinearTraversal) return "LinearTraversal"; - if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; - if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; - if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; - return "?"; -} -std::string demangle_unrolling(int t) -{ - if(t==NoUnrolling) return "NoUnrolling"; - if(t==InnerUnrolling) return "InnerUnrolling"; - if(t==CompleteUnrolling) return "CompleteUnrolling"; - return "?"; -} -std::string demangle_flags(int f) -{ - std::string res; - if(f&RowMajorBit) res += " | RowMajor"; - if(f&PacketAccessBit) res += " | Packet"; - if(f&LinearAccessBit) res += " | Linear"; - if(f&LvalueBit) res += " | Lvalue"; - if(f&DirectAccessBit) res += " | Direct"; - if(f&NestByRefBit) res += " | NestByRef"; - if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; - - return res; -} +using internal::demangle_flags; +using internal::demangle_traversal; +using internal::demangle_unrolling; template bool test_assign(const Dst&, const Src&, int traversal, int unrolling) From 73f692d16b544d619c2234259f4086cc93c577df Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 11:01:37 +0100 Subject: [PATCH 125/344] Fix ambiguous instantiation --- Eigen/src/SparseCore/SparseProduct.h | 75 +++++++++++++++++++++------- test/sparse_basic.cpp | 2 +- test/sparse_product.cpp | 4 ++ 3 files changed, 62 insertions(+), 19 deletions(-) diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index 26680b7a7..ea2c3a8a3 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -39,6 +39,34 @@ struct generic_product_impl { template static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + evalTo(dst, lhs, rhs, typename evaluator_traits::Shape()); + } + + // dense += sparse * sparse + template + static void addTo(Dest& dst, const ActualLhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + { + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + LhsNested lhsNested(lhs); + RhsNested rhsNested(rhs); + internal::sparse_sparse_to_dense_product_selector::type, + typename remove_all::type, Dest>::run(lhsNested,rhsNested,dst); + } + + // dense -= sparse * sparse + template + static void subTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, int* = typename enable_if::Shape,DenseShape>::value,int*>::type(0) ) + { + addTo(dst, -lhs, rhs); + } + +protected: + + // sparse = sparse * sparse + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, SparseShape) { typedef typename nested_eval::type LhsNested; typedef typename nested_eval::type RhsNested; @@ -47,6 +75,14 @@ struct generic_product_impl internal::conservative_sparse_sparse_product_selector::type, typename remove_all::type, Dest>::run(lhsNested,rhsNested,dst); } + + // dense = sparse * sparse + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, DenseShape) + { + dst.setZero(); + addTo(dst, lhs, rhs); + } }; // sparse * sparse-triangular @@ -61,33 +97,36 @@ struct generic_product_impl {}; -// Dense = sparse * sparse -template< typename DstXprType, typename Lhs, typename Rhs, int Options/*, typename Scalar*/> -struct Assignment, internal::assign_op, Sparse2Dense/*, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type*/> +// dense = sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::assign_op, Sparse2Dense> { - typedef Product SrcXprType; + typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { - dst.setZero(); - dst += src; + generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); } }; -// Dense += sparse * sparse -template< typename DstXprType, typename Lhs, typename Rhs, int Options> -struct Assignment, internal::add_assign_op, Sparse2Dense/*, - typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type*/> +// dense += sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::add_assign_op, Sparse2Dense> { - typedef Product SrcXprType; + typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { - typedef typename nested_eval::type LhsNested; - typedef typename nested_eval::type RhsNested; - LhsNested lhsNested(src.lhs()); - RhsNested rhsNested(src.rhs()); - internal::sparse_sparse_to_dense_product_selector::type, - typename remove_all::type, DstXprType>::run(lhsNested,rhsNested,dst); + generic_product_impl::addTo(dst,src.lhs(),src.rhs()); + } +}; + +// dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.) +template< typename DstXprType, typename Lhs, typename Rhs> +struct Assignment, internal::sub_assign_op, Sparse2Dense> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + generic_product_impl::subTo(dst,src.lhs(),src.rhs()); } }; diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index e8ebd7000..d8e42e984 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -438,7 +438,7 @@ template void sparse_basic(const SparseMatrixType& re { Index i = internal::random(0,rows-1); Index j = internal::random(0,rows-1); - Index v = internal::random(); + Scalar v = internal::random(); m1.coeffRef(i,j) = v; refMat1.coeffRef(i,j) = v; VERIFY_IS_APPROX(m1, refMat1); diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp index 8c83f08d7..7ec5270e8 100644 --- a/test/sparse_product.cpp +++ b/test/sparse_product.cpp @@ -79,12 +79,16 @@ template void sparse_product() // dense ?= sparse * sparse VERIFY_IS_APPROX(dm4 =m2*m3, refMat4 =refMat2*refMat3); VERIFY_IS_APPROX(dm4+=m2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=m2*m3, refMat4-=refMat2*refMat3); VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3, refMat4 =refMat2t.transpose()*refMat3); VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3, refMat4+=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3, refMat4-=refMat2t.transpose()*refMat3); VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3t.transpose(), refMat4 =refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3t.transpose(), refMat4+=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3t.transpose(), refMat4-=refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(dm4 =m2*m3t.transpose(), refMat4 =refMat2*refMat3t.transpose()); VERIFY_IS_APPROX(dm4+=m2*m3t.transpose(), refMat4+=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2*m3t.transpose(), refMat4-=refMat2*refMat3t.transpose()); VERIFY_IS_APPROX(dm4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); // test aliasing From 12f50a46979b85a7beb3bae00e223f4683c08c78 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 11:04:19 +0100 Subject: [PATCH 126/344] Fix assign vectorization logic with respect to fixed outer-stride --- Eigen/src/Core/AssignEvaluator.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 3667f60f2..ba7748395 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -54,6 +54,7 @@ private: InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, PacketSize = unpacket_traits::size }; @@ -65,7 +66,9 @@ private: MightVectorize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits::PacketAccess), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0 && int(JointAlignment)>=int(RequiredAlignment), MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess From 8c66b6bc6157da3aaa7a8529e1fcd6e257e7693f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 11:06:42 +0100 Subject: [PATCH 127/344] Simplify evaluator::Flags for Map<> --- Eigen/src/Core/CoreEvaluators.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index c0563f534..c898b2abc 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -633,20 +633,9 @@ struct evaluator > HasNoStride = HasNoInnerStride && HasNoOuterStride, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - // FIXME I don't get the code below, in particular why outer-stride-at-compile-time should have any effect on PacketAccessBit... - // Let's remove the code below for 3.4 if no issue occur -// PacketAlignment = unpacket_traits::alignment, -// KeepsPacketAccess = bool(HasNoInnerStride) -// && ( bool(IsDynamicSize) -// || HasNoOuterStride -// || ( OuterStrideAtCompileTime!=Dynamic -// && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime) % PacketAlignment)==0 ) ), - KeepsPacketAccess = bool(HasNoInnerStride), - - Flags0 = evaluator::Flags, - Flags1 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags0) : int(Flags0 & ~LinearAccessBit), - Flags = KeepsPacketAccess ? int(Flags1) : (int(Flags1) & ~PacketAccessBit), + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), Alignment = int(MapOptions)&int(AlignedMask) }; From 2475a1de48cb6f602ab7340d1e1afad15754a103 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 15:39:50 +0100 Subject: [PATCH 128/344] bug #1008: stabilize isfinite/isinf/isnan/hasNaN/allFinite functions for fast-math mode. --- Eigen/src/Core/BooleanRedux.h | 8 ++++ Eigen/src/Core/MathFunctions.h | 69 +++++++++++++++++++++++++++++----- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index ba45cf5c3..be4cd9b3a 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -142,7 +142,11 @@ inline Eigen::Index DenseBase::count() const template inline bool DenseBase::hasNaN() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else return !((derived().array()==derived().array()).all()); +#endif } /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. @@ -152,7 +156,11 @@ inline bool DenseBase::hasNaN() const template inline bool DenseBase::allFinite() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else return !((derived()-derived()).hasNaN()); +#endif } } // end namespace Eigen diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 19b7954a9..299a9d61d 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -818,11 +818,18 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } +// std::is* do not work with fast-math and gcc +#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && defined __FAST_MATH__) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + template EIGEN_DEVICE_FUNC bool (isfinite)(const T& x) { - #if EIGEN_HAS_CXX11_MATH + #if EIGEN_USE_STD_FPCLASSIFY using std::isfinite; return isfinite EIGEN_NOT_A_MACRO (x); #else @@ -830,11 +837,23 @@ bool (isfinite)(const T& x) #endif } +template +EIGEN_DEVICE_FUNC +bool (isinf)(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + template EIGEN_DEVICE_FUNC bool (isnan)(const T& x) { - #if EIGEN_HAS_CXX11_MATH + #if EIGEN_USE_STD_FPCLASSIFY using std::isnan; return isnan EIGEN_NOT_A_MACRO (x); #else @@ -842,18 +861,48 @@ bool (isnan)(const T& x) #endif } +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +//MSVC defines a _isnan builtin function, but for double only +template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(double(x)); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(double(x)); } + +#elif defined __FAST_MATH__ + +#if EIGEN_COMP_CLANG +#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) +#elif EIGEN_COMP_GNUC +#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-fast-math"))) +#else +#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC +#endif + template -EIGEN_DEVICE_FUNC -bool (isinf)(const T& x) +EIGEN_TMP_NOOPT_ATTRIB +bool isinf_helper(const T& x) { - #if EIGEN_HAS_CXX11_MATH - using std::isinf; - return isinf EIGEN_NOT_A_MACRO (x); - #else - return x>NumTraits::highest() || x::lowest(); - #endif + return x>NumTraits::highest() || x::lowest(); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return !(x <= std::numeric_limits::infinity()); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return x!=x; } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return x!=x; } + +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return isinf_helper(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return isinf_helper(x); } +#if EIGEN_COMP_CLANG +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return isinf(double(x)); } +#endif + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + template bool (isfinite)(const std::complex& x) { From e3031d7bfa5e5f983fd670ee301776e4421a43bf Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 16:43:23 +0100 Subject: [PATCH 129/344] bug #1008: improve handling of fast-math mode for older gcc versions. --- Eigen/src/Core/MathFunctions.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 299a9d61d..6d2730960 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -819,7 +819,7 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) } // std::is* do not work with fast-math and gcc -#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && defined __FAST_MATH__) +#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__) #define EIGEN_USE_STD_FPCLASSIFY 1 #else #define EIGEN_USE_STD_FPCLASSIFY 0 @@ -870,14 +870,18 @@ template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan( template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(double(x)); } -#elif defined __FAST_MATH__ +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__) #if EIGEN_COMP_CLANG -#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) #elif EIGEN_COMP_GNUC -#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-fast-math"))) + #if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) + #else + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) + #endif #else -#define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC #endif template @@ -887,14 +891,13 @@ bool isinf_helper(const T& x) return x>NumTraits::highest() || x::lowest(); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return !(x <= std::numeric_limits::infinity()); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return x!=x; } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return x!=x; } - -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return isinf_helper(x); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return isinf_helper(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return __builtin_isinf(x); } #if EIGEN_COMP_CLANG -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return isinf(double(x)); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(double(x)); } #endif #undef EIGEN_TMP_NOOPT_ATTRIB From 946f8850e83c3174d5f0b22f2818c88bf7a33062 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 16:44:45 +0100 Subject: [PATCH 130/344] bug #1008: add a unit test for fast-math mode and isinf/isnan/isfinite/etc. functions. --- test/CMakeLists.txt | 12 ++++++ test/fastmath.cpp | 90 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 test/fastmath.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8a8ba6f4..822ca8f10 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -257,6 +257,18 @@ ei_add_test(dense_storage) ei_add_test(ctorleak) ei_add_test(mpl2only) +check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH) +if(COMPILER_SUPPORT_FASTMATH) + set(EIGEN_FASTMATH_FLAGS "-ffast-math") +else() + check_cxx_compiler_flag("/fp:fast" COMPILER_SUPPORT_FPFAST) + if(COMPILER_SUPPORT_FPFAST) + set(EIGEN_FASTMATH_FLAGS "/fp:fast") + endif() +endif() + +ei_add_test(fastmath " ${EIGEN_FASTMATH_FLAGS} ") + # # ei_add_test(denseLM) if(QT4_FOUND) diff --git a/test/fastmath.cpp b/test/fastmath.cpp new file mode 100644 index 000000000..2911c0544 --- /dev/null +++ b/test/fastmath.cpp @@ -0,0 +1,90 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void check(bool b, bool ref) +{ + std::cout << b; + if(b==ref) + std::cout << " OK "; + else + std::cout << " BAD "; +} + +template +void check_inf_nan(bool dryrun) { + Matrix m(10); + m.setRandom(); + m(3) = std::numeric_limits::quiet_NaN(); + + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),true); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), true); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( (numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(4) /= 0.0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(4) << ") = "; check((std::isinf)(m(4)),true); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(4)), true); std::cout << "\n"; + std::cout << "std::isnan(" << m(4) << ") = "; check((std::isnan)(m(4)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(4)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(4)) ); + VERIFY( (numext::isinf)(m(4)) ); + VERIFY( !(numext::isnan)(m(4)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(3) = 0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),true); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), true); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 0); std::cout << "\n"; + std::cout << "\n\n"; + } + else + { + VERIFY( (numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( !(numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( !m.hasNaN() ); + } +} + +void test_fastmath() { + std::cout << "*** float *** \n\n"; check_inf_nan(true); + std::cout << "*** double ***\n\n"; check_inf_nan(true); + std::cout << "*** long double *** \n\n"; check_inf_nan(true); + + check_inf_nan(false); + check_inf_nan(false); + check_inf_nan(false); +} From d4cf436cb1a8c2af96c9351114195847bc3ff1f1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 17:35:54 +0100 Subject: [PATCH 131/344] Enable mpreal unit test for C++11 compiler only --- unsupported/test/CMakeLists.txt | 4 ++-- unsupported/test/mpreal/mpreal.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 3d82508f7..cc4ce1c59 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -47,11 +47,11 @@ ei_add_test(FFT) find_package(MPFR 2.3.0) find_package(GMP) -if(MPFR_FOUND) +if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11) include_directories(${MPFR_INCLUDES} ./mpreal) ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ") set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES}) - ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" ) + ei_add_test(mpreal_support "-std=c++11" "${EIGEN_MPFR_TEST_LIBRARIES}" ) else() ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ") endif() diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index 9b96ec411..c4f6cf0cb 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -107,7 +107,7 @@ #define MPREAL_HAVE_EXPLICIT_CONVERTERS #endif -//#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h +#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h #if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG) #define MPREAL_MSVC_DEBUGVIEW_CODE DebugView = toString(); From 827d8a9bad6f6c8a8e0211358b51c60db18a2cfb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Oct 2015 21:37:03 +0100 Subject: [PATCH 132/344] Fix false negative in redux test --- test/redux.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/redux.cpp b/test/redux.cpp index 849faf55e..bfd9a8d50 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -24,7 +24,7 @@ template void matrixRedux(const MatrixType& m) MatrixType m1 = MatrixType::Random(rows, cols); // The entries of m1 are uniformly distributed in [0,1], so m1.prod() is very small. This may lead to test - // failures if we underflow into denormals. Thus, we scale so that entires are close to 1. + // failures if we underflow into denormals. Thus, we scale so that entries are close to 1. MatrixType m1_for_prod = MatrixType::Ones(rows, cols) + RealScalar(0.2) * m1; VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); @@ -71,7 +71,9 @@ template void matrixRedux(const MatrixType& m) // test nesting complex expression VERIFY_EVALUATION_COUNT( (m1.matrix()*m1.matrix().transpose()).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); - VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())*Scalar(2)).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); + Matrix m2(rows,rows); + m2.setRandom(); + VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())+m2).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); } From 77ff3386b7d28c68c9e277e60f285ae1b3124b47 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 11:42:14 +0100 Subject: [PATCH 133/344] Refactoring of the cost model: - Dynamic is now an invalid value - introduce a HugeCost constant to be used for runtime-cost values or arbitrarily huge cost - add sanity checks for cost values: must be >=0 and not too large This change provides several benefits: - it fixes shortcoming is some cost computation where the Dynamic case was not properly handled. - it simplifies cost computation logic, and should avoid future similar shortcomings. - it allows to distinguish between different level of dynamic/huge/infinite cost - it should enable further simplifications in the computation of costs (save compilation time) --- Eigen/src/Core/AssignEvaluator.h | 4 +- Eigen/src/Core/BooleanRedux.h | 8 +-- Eigen/src/Core/CoreEvaluators.h | 50 ++++++++++++++----- Eigen/src/Core/NumTraits.h | 6 +-- Eigen/src/Core/ProductEvaluators.h | 12 +++-- Eigen/src/Core/Redux.h | 8 +-- Eigen/src/Core/Visitor.h | 4 +- Eigen/src/Core/util/Constants.h | 8 +++ Eigen/src/Core/util/StaticAssert.h | 7 ++- Eigen/src/Core/util/XprHelper.h | 16 ++---- Eigen/src/SparseCore/SparseCompressedBase.h | 10 +++- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 20 ++++++-- Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 14 ++++-- Eigen/src/SparseCore/SparseDenseProduct.h | 12 +++-- Eigen/src/SparseCore/SparseDiagonalProduct.h | 6 +-- Eigen/src/SparseCore/SparseProduct.h | 2 +- Eigen/src/SparseCore/SparseTriangularView.h | 2 +- Eigen/src/SparseCore/SparseVector.h | 8 +-- test/redux.cpp | 1 - test/vectorwiseop.cpp | 5 ++ .../KroneckerProduct/KroneckerTensorProduct.h | 2 +- .../Eigen/src/Skyline/SkylineProduct.h | 2 +- 22 files changed, 139 insertions(+), 68 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index ba7748395..e66cf074f 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -98,10 +98,10 @@ private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(SrcEvaluator::CoeffReadCost) != Dynamic + && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index be4cd9b3a..bda9f6966 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -83,8 +83,8 @@ inline bool DenseBase::all() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic + && Evaluator::CoeffReadCost < HugeCost + && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -109,8 +109,8 @@ inline bool DenseBase::any() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic + && Evaluator::CoeffReadCost < HugeCost + && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index c898b2abc..fb0cdc99c 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -137,11 +137,15 @@ struct evaluator > m_outerStride(IsVectorAtCompileTime ? 0 : int(IsRowMajor) ? ColsAtCompileTime : RowsAtCompileTime) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const { @@ -327,7 +331,9 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -376,7 +382,10 @@ struct unary_evaluator, IndexBased > EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -449,7 +458,10 @@ struct binary_evaluator, IndexBased, IndexBase : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -502,7 +514,10 @@ struct unary_evaluator, IndexBased> EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_unaryOp(op.functor()), m_argImpl(op.nestedExpression()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -559,6 +574,7 @@ struct mapbase_evaluator : evaluator_base { EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const @@ -713,7 +729,10 @@ struct evaluator > Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) {} + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } }; // no direct-access => dispatch to a unary evaluator @@ -831,8 +850,8 @@ struct evaluator > typedef Select XprType; enum { CoeffReadCost = evaluator::CoeffReadCost - + EIGEN_SIZE_MAX(evaluator::CoeffReadCost, - evaluator::CoeffReadCost), + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, @@ -843,7 +862,9 @@ struct evaluator > : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -965,11 +986,11 @@ struct evaluator > typedef typename ArgType::Scalar InputScalar; typedef typename XprType::Scalar Scalar; enum { - TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(XprType::ColsAtCompileTime) + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) }; typedef typename MemberOp::template Cost CostOpType; enum { - CoeffReadCost = TraversalSize==Dynamic ? Dynamic + CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits), @@ -979,7 +1000,10 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } typedef typename XprType::CoeffReturnType CoeffReturnType; diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 61ec2f533..1d85dec72 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -157,9 +157,9 @@ struct NumTraits > IsInteger = NumTraits::IsInteger, IsSigned = NumTraits::IsSigned, RequireInitialization = 1, - ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, - AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::AddCost, - MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::MulCost + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost }; static inline RealScalar epsilon() { return NumTraits::epsilon(); } diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 04dc08957..e7677b90c 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -422,7 +422,11 @@ struct product_evaluator, ProductTag, DenseShape, m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. m_innerDim(xpr.lhs().cols()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } // Everything below here is taken from CoeffBasedProduct.h @@ -447,11 +451,11 @@ struct product_evaluator, ProductTag, DenseShape, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost - : (InnerSize == Dynamic || LhsCoeffReadCost==Dynamic || RhsCoeffReadCost==Dynamic || NumTraits::AddCost==Dynamic || NumTraits::MulCost==Dynamic) ? Dynamic + : InnerSize == Dynamic ? HugeCost : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, - Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + Unroll = CoeffReadCost < HugeCost && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, @@ -736,6 +740,8 @@ public: diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) : m_diagImpl(diag), m_matImpl(mat) { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 309898b36..fcf0ba76a 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -51,9 +51,9 @@ public: public: enum { Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost == Dynamic - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost == Dynamic) - ) ? Dynamic + || Derived::CoeffReadCost >= HugeCost + || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost >= HugeCost) + ) ? HugeCost : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) @@ -61,7 +61,7 @@ public: public: enum { - Unrolling = Cost != Dynamic && Cost <= UnrollingLimit + Unrolling = Cost < HugeCost && Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index a4e2cebab..f3f15e9e0 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -110,8 +110,8 @@ void DenseBase::visit(Visitor& visitor) const ThisEvaluator thisEval(derived()); enum { unroll = SizeAtCompileTime != Dynamic - && ThisEvaluator::CoeffReadCost != Dynamic - && (SizeAtCompileTime == 1 || internal::functor_traits::Cost != Dynamic) + && ThisEvaluator::CoeffReadCost < HugeCost + && (SizeAtCompileTime == 1 || internal::functor_traits::Cost < HugeCost) && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost <= EIGEN_UNROLLING_LIMIT }; return internal::visitor_impl::value), \ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES) +// Check that a cost value is positive, and that is stay within a reasonable range +// TODO this check could be enabled for internal debugging only +#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ + EIGEN_STATIC_ASSERT((C)>=0 && (C)<2*HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); #endif // EIGEN_STATIC_ASSERT_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index cd93b2320..209c73e1e 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -397,28 +397,20 @@ struct transfer_constness template::type> struct nested_eval { enum { - // For the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. - // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values. - // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues - // (poor choice of temporaries). - // It's important that this value can still be squared without integer overflowing. - DynamicAsInteger = 10000, ScalarReadCost = NumTraits::Scalar>::ReadCost, - ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), CoeffReadCost = evaluator::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory? // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. // Another solution could be to count the number of temps? - CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), - NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, - CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, - CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger + NAsInteger = n == Dynamic ? HugeCost : n, + CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost, + CostNoEval = NAsInteger * CoeffReadCost }; typedef typename conditional< ( (int(evaluator::Flags) & EvalBeforeNestingBit) || - (int(CostEvalAsInteger) < int(CostNoEvalAsInteger)) ), + (int(CostEval) < int(CostNoEval)) ), PlainObject, typename ref_selector::type >::type type; diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index c8a2705f9..fb795a0ed 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -226,8 +226,14 @@ struct evaluator > Flags = Derived::Flags }; - evaluator() : m_matrix(0) {} - explicit evaluator(const Derived &mat) : m_matrix(&mat) {} + evaluator() : m_matrix(0) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + explicit evaluator(const Derived &mat) : m_matrix(&mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_matrix->nonZeros(); diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index b87b6b749..abbbf397b 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -139,7 +139,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_lhsImpl.nonZerosEstimate() + m_rhsImpl.nonZerosEstimate(); @@ -220,7 +223,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return (std::min)(m_lhsImpl.nonZerosEstimate(), m_rhsImpl.nonZerosEstimate()); @@ -289,7 +295,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_rhsImpl.nonZerosEstimate(); @@ -359,7 +368,10 @@ public: : m_functor(xpr.functor()), m_lhsImpl(xpr.lhs()), m_rhsImpl(xpr.rhs()) - { } + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_lhsImpl.nonZerosEstimate(); diff --git a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index 469bac36e..fe4a97120 100644 --- a/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -29,7 +29,11 @@ struct unary_evaluator, IteratorBased> Flags = XprType::Flags }; - explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_argImpl.nonZerosEstimate(); @@ -108,7 +112,11 @@ struct unary_evaluator, IteratorBased> Flags = XprType::Flags }; - explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) {} + explicit unary_evaluator(const XprType& op) : m_functor(op.functor()), m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } protected: typedef typename evaluator::InnerIterator EvalIterator; diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 2e34ae74c..87c946b9b 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -221,7 +221,7 @@ protected: public: enum { Flags = NeedToTranspose ? RowMajorBit : 0, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; class InnerIterator : public LhsIterator @@ -263,12 +263,16 @@ public: sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } // transpose case sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs) : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) - {} + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } protected: const LhsArg m_lhs; diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index cf31e5a53..e4af49e09 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2014 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -39,7 +39,7 @@ struct product_evaluator, ProductTag, Diagonal : public sparse_diagonal_product_evaluator { typedef Product XprType; - enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = HugeCost, Flags = Rhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} @@ -50,7 +50,7 @@ struct product_evaluator, ProductTag, SparseSh : public sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> { typedef Product XprType; - enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags + enum { CoeffReadCost = HugeCost, Flags = Lhs::Flags&RowMajorBit, Alignment = 0 }; // FIXME CoeffReadCost & Flags typedef sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> Base; explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal().transpose()) {} diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index ea2c3a8a3..cbd0db71b 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 57d88893e..7c718e4e1 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2014 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // Copyright (C) 2012 Désiré Nuentsa-Wakam // // This Source Code Form is subject to the terms of the Mozilla diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index 94f8d0341..7ec73a365 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -41,7 +41,6 @@ struct traits > MaxRowsAtCompileTime = RowsAtCompileTime, MaxColsAtCompileTime = ColsAtCompileTime, Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit) | CompressedAccessBit, - CoeffReadCost = NumTraits::ReadCost, SupportedAccessPatterns = InnerRandomAccessPattern }; }; @@ -380,7 +379,10 @@ struct evaluator > Flags = SparseVectorType::Flags }; - explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) {} + explicit evaluator(const SparseVectorType &mat) : m_matrix(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } inline Index nonZerosEstimate() const { return m_matrix.nonZeros(); diff --git a/test/redux.cpp b/test/redux.cpp index bfd9a8d50..6ddc59c18 100644 --- a/test/redux.cpp +++ b/test/redux.cpp @@ -74,7 +74,6 @@ template void matrixRedux(const MatrixType& m) Matrix m2(rows,rows); m2.setRandom(); VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())+m2).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); - } template void vectorRedux(const VectorType& w) diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index ddd9f8389..529f4298b 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -217,6 +217,11 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX( (m1 * m1.transpose()).colwise().sum(), m1m1.colwise().sum()); Matrix tmp(rows); VERIFY_EVALUATION_COUNT( tmp = (m1 * m1.transpose()).colwise().sum(), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0)); + + m2 = m1.rowwise() - (m1.colwise().sum()/m1.rows()).eval(); + m1 = m1.rowwise() - (m1.colwise().sum()/m1.rows()); + VERIFY_IS_APPROX( m1, m2 ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0) ); } void test_vectorwiseop() diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h index 4406437cc..4d3e5358e 100644 --- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h @@ -240,7 +240,7 @@ struct traits > Flags = ((LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) | EvalBeforeNestingBit | EvalBeforeAssigningBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; typedef SparseMatrix ReturnType; diff --git a/unsupported/Eigen/src/Skyline/SkylineProduct.h b/unsupported/Eigen/src/Skyline/SkylineProduct.h index d218a7c25..d9eb814c1 100644 --- a/unsupported/Eigen/src/Skyline/SkylineProduct.h +++ b/unsupported/Eigen/src/Skyline/SkylineProduct.h @@ -49,7 +49,7 @@ struct internal::traits > { | EvalBeforeAssigningBit | EvalBeforeNestingBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; typedef typename internal::conditional Date: Wed, 28 Oct 2015 11:59:20 +0100 Subject: [PATCH 134/344] Extend vectorwiseop unit test with column/row vectors as input. --- test/vectorwiseop.cpp | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index 529f4298b..87476f95b 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -158,16 +158,22 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() + colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec); - VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + } m2 = m1; m2.rowwise() += rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + } // test substraction @@ -176,16 +182,22 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() - colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec); - VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + } m2 = m1; m2.rowwise() -= rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + } // test norm rrres = m1.colwise().norm(); @@ -221,7 +233,7 @@ template void vectorwiseop_matrix(const MatrixType& m) m2 = m1.rowwise() - (m1.colwise().sum()/m1.rows()).eval(); m1 = m1.rowwise() - (m1.colwise().sum()/m1.rows()); VERIFY_IS_APPROX( m1, m2 ); - VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0) ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/m1.rows()), (MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime!=1 ? 1 : 0) ); } void test_vectorwiseop() @@ -232,4 +244,6 @@ void test_vectorwiseop() CALL_SUBTEST_4( vectorwiseop_matrix(Matrix4cf()) ); CALL_SUBTEST_5( vectorwiseop_matrix(Matrix()) ); CALL_SUBTEST_6( vectorwiseop_matrix(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(VectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(RowVectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } From 1f11dd6cedc223f92f9ce99a22080dd267fcb488 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 12:53:13 +0100 Subject: [PATCH 135/344] Add a unit test for large chains of products --- test/product_large.cpp | 11 +++++++++++ test/product_small.cpp | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/test/product_large.cpp b/test/product_large.cpp index 84c489580..7207973c2 100644 --- a/test/product_large.cpp +++ b/test/product_large.cpp @@ -61,6 +61,17 @@ void test_product_large() MatrixXf r2 = mat1.row(2)*mat2; VERIFY_IS_APPROX(r2, (mat1.row(2)*mat2).eval()); } + + { + Eigen::MatrixXd A(10,10), B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } #endif // Regression test for bug 714: diff --git a/test/product_small.cpp b/test/product_small.cpp index 091955a0f..c561ec63b 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -56,5 +56,16 @@ void test_product_small() VERIFY_IS_APPROX(B * A.inverse(), B * A.inverse()[0]); VERIFY_IS_APPROX(A.inverse() * C, A.inverse()[0] * C); } + + { + Eigen::Matrix A, B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } #endif } From 85313048581d22901c7940a46bd41b19e88ff47c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 13:39:02 +0100 Subject: [PATCH 136/344] Simplify cost computations based on HugeCost being smaller that unrolling limit --- Eigen/src/Core/AssignEvaluator.h | 2 -- Eigen/src/Core/BooleanRedux.h | 4 ---- Eigen/src/Core/ProductEvaluators.h | 2 +- Eigen/src/Core/Redux.h | 12 +++--------- Eigen/src/Core/Visitor.h | 13 +++++-------- Eigen/src/Core/util/Constants.h | 2 +- Eigen/src/Core/util/StaticAssert.h | 2 +- 7 files changed, 11 insertions(+), 26 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index e66cf074f..db3bef38d 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -98,10 +98,8 @@ private: enum { UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(SrcEvaluator::CoeffReadCost) < HugeCost && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) }; diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index bda9f6966..8409d8749 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -83,8 +83,6 @@ inline bool DenseBase::all() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost < HugeCost - && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); @@ -109,8 +107,6 @@ inline bool DenseBase::any() const typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && Evaluator::CoeffReadCost < HugeCost - && NumTraits::AddCost < HugeCost && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; Evaluator evaluator(derived()); diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index e7677b90c..2927fcc0e 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -455,7 +455,7 @@ struct product_evaluator, ProductTag, DenseShape, : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + (InnerSize - 1) * NumTraits::AddCost, - Unroll = CoeffReadCost < HugeCost && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, LhsFlags = LhsEtorType::Flags, RhsFlags = RhsEtorType::Flags, diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index fcf0ba76a..d170cae29 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -50,20 +50,14 @@ public: public: enum { - Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost >= HugeCost - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost >= HugeCost) - ) ? HugeCost - : Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: enum { - Unrolling = Cost < HugeCost && Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; #ifdef EIGEN_DEBUG_ASSIGN diff --git a/Eigen/src/Core/Visitor.h b/Eigen/src/Core/Visitor.h index f3f15e9e0..7aac0b6e1 100644 --- a/Eigen/src/Core/Visitor.h +++ b/Eigen/src/Core/Visitor.h @@ -109,14 +109,11 @@ void DenseBase::visit(Visitor& visitor) const typedef typename internal::visitor_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); - enum { unroll = SizeAtCompileTime != Dynamic - && ThisEvaluator::CoeffReadCost < HugeCost - && (SizeAtCompileTime == 1 || internal::functor_traits::Cost < HugeCost) - && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost - <= EIGEN_UNROLLING_LIMIT }; - return internal::visitor_impl::run(thisEval, visitor); + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits::Cost <= EIGEN_UNROLLING_LIMIT + }; + return internal::visitor_impl::run(thisEval, visitor); } namespace internal { diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 12238e5dd..c35077af6 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -36,7 +36,7 @@ const int Infinity = -1; * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions. * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow. */ -const int HugeCost = 1000; +const int HugeCost = 10000; /** \defgroup flags Flags * \ingroup Core_Module diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 77da6cc5f..9d7302d81 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -204,6 +204,6 @@ // Check that a cost value is positive, and that is stay within a reasonable range // TODO this check could be enabled for internal debugging only #define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ - EIGEN_STATIC_ASSERT((C)>=0 && (C)<2*HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); + EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); #endif // EIGEN_STATIC_ASSERT_H From 1a842c0dc441cca1bd66a516b16d7fe6a4c0ba26 Mon Sep 17 00:00:00 2001 From: Ilya Popov Date: Wed, 28 Oct 2015 09:52:55 +0000 Subject: [PATCH 137/344] Fix typo in TutorialSparse: laplace equation contains gradient symbol (\nabla) instead of laplacian (\Delta). --- doc/TutorialSparse.dox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/TutorialSparse.dox b/doc/TutorialSparse.dox index 835c59354..fb07adaa2 100644 --- a/doc/TutorialSparse.dox +++ b/doc/TutorialSparse.dox @@ -83,7 +83,7 @@ There is no notion of compressed/uncompressed mode for a SparseVector. \section TutorialSparseExample First example -Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \nabla u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. +Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \Delta u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. Such problem can be mathematically expressed as a linear problem of the form \f$ Ax=b \f$ where \f$ x \f$ is the vector of \c m unknowns (in our case, the values of the pixels), \f$ b \f$ is the right hand side vector resulting from the boundary conditions, and \f$ A \f$ is an \f$ m \times m \f$ matrix containing only a few non-zero elements resulting from the discretization of the Laplacian operator.

ClassModuleSolver kindMatrix kindFeatures related to performance Dependencies,License

Notes

SimplicialLLT \link SparseCholesky_Module SparseCholesky \endlinkDirect LLt factorizationSPDFill-in reducingbuilt-in, LGPLSimplicialLDLT is often preferable
SimplicialLDLT \link SparseCholesky_Module SparseCholesky \endlinkDirect LDLt factorizationSPDFill-in reducingbuilt-in, LGPLRecommended for very sparse and not too large problems (e.g., 2D Poisson eq.)
ConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkClassic iterative CGSPDPreconditionningbuilt-in, MPL2Recommended for large symmetric problems (e.g., 3D Poisson eq.)
LeastSquaresConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkCG for rectangular least-square problemRectangularPreconditionningbuilt-in, MPL2Solve for min |A'Ax-b|^2 without forming A'A
BiCGSTAB\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkIterative stabilized bi-conjugate gradientSquarePreconditionningbuilt-in, MPL2To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.
SparseLU \link SparseLU_Module SparseLU \endlink LU factorization Square Fill-in reducing, Leverage fast dense algebra built-in, MPL2 optimized for small and large problems with irregular patterns
SparseQR \link SparseQR_Module SparseQR \endlink QR factorizationAny, rectangular Fill-in reducingbuilt-in, MPL2recommended for least-square problems, has a basic rank-revealing feature
Wrappers to external solvers
PastixLLT \n PastixLDLT \n PastixLU\link PaStiXSupport_Module PaStiXSupport \endlinkDirect LLt, LDLt, LU factorizationsSPD \n SPD \n SquareFill-in reducing, Leverage fast dense algebra, Multithreading Requires the PaStiX package, \b CeCILL-C optimized for tough problems and symmetric patterns
From 28ddb5158dbe2a633a11f77ad7145ceae08abbf3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 16:27:20 +0100 Subject: [PATCH 138/344] Enable std::isfinite/nan/inf on MSVC 2013 and newer and clang. Fix isinf for gcc4.4 and older msvc with fast-math. --- Eigen/src/Core/MathFunctions.h | 50 +++++++++++++++------------------- test/fastmath.cpp | 8 ++++++ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 6d2730960..515eca137 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -818,8 +818,8 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } -// std::is* do not work with fast-math and gcc -#if EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__) +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) #define EIGEN_USE_STD_FPCLASSIFY 1 #else #define EIGEN_USE_STD_FPCLASSIFY 0 @@ -865,40 +865,34 @@ bool (isnan)(const T& x) #if EIGEN_COMP_MSVC -//MSVC defines a _isnan builtin function, but for double only -template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(double(x)); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(double(x)); } - -#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__) - -#if EIGEN_COMP_CLANG - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optnone)) -#elif EIGEN_COMP_GNUC - #if EIGEN_GNUC_AT_LEAST(5,0) - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) - #else - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) - #endif -#else - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC -#endif - -template -EIGEN_TMP_NOOPT_ATTRIB -bool isinf_helper(const T& x) +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) { - return x>NumTraits::highest() || x::lowest(); + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; } +//MSVC defines a _isnan builtin function, but for double only +template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(x); } + +template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) +#else + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return __builtin_isnan(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return __builtin_isnan(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return __builtin_isnan(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return __builtin_isinf(x); } template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return __builtin_isinf(x); } -#if EIGEN_COMP_CLANG -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(double(x)); } -#endif +template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(x); } #undef EIGEN_TMP_NOOPT_ATTRIB diff --git a/test/fastmath.cpp b/test/fastmath.cpp index 2911c0544..16462d54f 100644 --- a/test/fastmath.cpp +++ b/test/fastmath.cpp @@ -18,6 +18,14 @@ void check(bool b, bool ref) std::cout << " BAD "; } +#if EIGEN_COMP_MSVC < 1800 +namespace std { + template bool (isfinite)(T x) { return _finite(x); } + template bool (isnan)(T x) { return _isnan(x); } + template bool (isinf)(T x) { return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; } +} +#endif + template void check_inf_nan(bool dryrun) { Matrix m(10); From 6759a21e49614dd7cd977e3c1fd782458f33fc9f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 16:49:15 +0100 Subject: [PATCH 139/344] CUDA support: define more accurate min/max values for device::numeric_limits of float and double using values from cfloat header --- Eigen/src/Core/util/Meta.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6eb409194..ef35cefb4 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -11,6 +11,10 @@ #ifndef EIGEN_META_H #define EIGEN_META_H +#if defined(__CUDA_ARCH__) +#include +#endif + namespace Eigen { namespace internal { @@ -138,16 +142,16 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static float (max)() { return CUDART_MAX_NORMAL_F; } EIGEN_DEVICE_FUNC - static float (min)() { return __FLT_EPSILON__; } + static float (min)() { return FLT_MIN; } }; template<> struct numeric_limits { EIGEN_DEVICE_FUNC static double epsilon() { return __DBL_EPSILON__; } EIGEN_DEVICE_FUNC - static double (max)() { return CUDART_INF; } + static double (max)() { return DBL_MAX; } EIGEN_DEVICE_FUNC - static double (min)() { return __DBL_EPSILON__; } + static double (min)() { return DBL_MIN; } }; template<> struct numeric_limits { From 5b6cff5b0e7dcb7b36b44c61263da7e281800799 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 20:18:00 +0100 Subject: [PATCH 140/344] fix typo --- test/fastmath.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fastmath.cpp b/test/fastmath.cpp index 16462d54f..efdd5b313 100644 --- a/test/fastmath.cpp +++ b/test/fastmath.cpp @@ -18,7 +18,7 @@ void check(bool b, bool ref) std::cout << " BAD "; } -#if EIGEN_COMP_MSVC < 1800 +#if EIGEN_COMP_MSVC && EIGEN_COMP_MSVC < 1800 namespace std { template bool (isfinite)(T x) { return _finite(x); } template bool (isnan)(T x) { return _isnan(x); } From c688cc28d631c84659d5b0931957f8772c55b230 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 28 Oct 2015 20:20:05 +0100 Subject: [PATCH 141/344] fix copy/paste typo --- Eigen/src/Core/MathFunctions.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 515eca137..1820fc1c8 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -875,9 +875,9 @@ template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan( template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const long double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC bool (isinf)(const float& x) { return isinf_msvc_helper(x); } #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) From 7a5f83ca60c667898e6d8096dac5680793ddfce9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 03:55:39 -0700 Subject: [PATCH 142/344] Add overloads for real times sparse operations. This avoids real to complex conversions, and also fixes a compilation issue with MSVC. --- Eigen/src/Core/ArrayBase.h | 4 +--- Eigen/src/Core/DenseBase.h | 15 ++++++--------- Eigen/src/Core/util/XprHelper.h | 8 ++++---- Eigen/src/SparseCore/SparseMatrixBase.h | 16 ++++++++++++++-- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 151c05526..66813c8ea 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -46,15 +46,13 @@ template class ArrayBase typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef DenseBase Base; + using Base::operator*; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 488f15061..e181dafaf 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -40,18 +40,14 @@ static inline void check_DenseIndex_is_signed() { */ template class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + DenseCoeffsBase > #else : public DenseCoeffsBase #endif // not EIGEN_PARSED_BY_DOXYGEN { public: - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator/; - /** Inner iterator type to iterate over the coefficients of a row or column. * \sa class InnerIterator @@ -77,9 +73,10 @@ template class DenseBase typedef Scalar value_type; typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; - typedef internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> Base; + using Base::operator*; + using Base::operator/; using Base::derived; using Base::const_cast_derived; using Base::rows; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 209c73e1e..f9e2959cc 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -452,9 +452,9 @@ struct generic_xpr_base /** \internal Helper base class to add a scalar multiple operator * overloads for complex types */ -template::value > -struct special_scalar_op_base : public DenseCoeffsBase +struct special_scalar_op_base : public BaseType { // dummy operator* so that the // "using special_scalar_op_base::operator*" compiles @@ -463,8 +463,8 @@ struct special_scalar_op_base : public DenseCoeffsBase void operator/(dummy) const; }; -template -struct special_scalar_op_base : public DenseCoeffsBase +template +struct special_scalar_op_base : public BaseType { const CwiseUnaryOp, Derived> operator*(const OtherScalar& scalar) const diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 38eb1c37a..74b498a47 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -23,7 +23,14 @@ namespace Eigen { * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN. */ -template class SparseMatrixBase : public EigenBase +template class SparseMatrixBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + EigenBase > +#else + : public EigenBase +#endif // not EIGEN_PARSED_BY_DOXYGEN { public: @@ -42,7 +49,12 @@ template class SparseMatrixBase : public EigenBase >::type PacketReturnType; typedef SparseMatrixBase StorageBaseType; - typedef EigenBase Base; + typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; + + using Base::operator*; + using Base::operator/; + typedef Matrix IndexVector; typedef Matrix ScalarVector; From 568d488a2778b6e539a417b1ab2b62d1a096784e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 13:16:15 +0100 Subject: [PATCH 143/344] Fusion the two similar specialization of Sparse2Dense Assignment. This change also fixes a compilation issue with MSVC<=2013. --- Eigen/src/SparseCore/SparseAssign.h | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 4b663a59e..cb154d1c2 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -136,9 +136,13 @@ struct Assignment template< typename DstXprType, typename SrcXprType, typename Functor> struct Assignment { + typedef typename DstXprType::Scalar Scalar; static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + if(internal::is_same >::value) + dst.setZero(); internal::evaluator srcEval(src); internal::evaluator dstEval(dst); @@ -149,23 +153,6 @@ struct Assignment } }; -template< typename DstXprType, typename SrcXprType> -struct Assignment, Sparse2Dense> -{ - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - - dst.setZero(); - internal::evaluator srcEval(src); - internal::evaluator dstEval(dst); - const Index outerEvaluationSize = (internal::evaluator::Flags&RowMajorBit) ? src.rows() : src.cols(); - for (Index j=0; j::InnerIterator i(srcEval,j); i; ++i) - dstEval.coeffRef(i.row(),i.col()) = i.value(); - } -}; - // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error template From 7cfbe35e49fd43d646d2aecf7b93630b3916f2f8 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 21:05:52 +0100 Subject: [PATCH 144/344] Fix duplicated declaration --- Eigen/src/SparseCore/SparseMatrixBase.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 74b498a47..ff417302f 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -49,11 +49,6 @@ template class SparseMatrixBase >::type PacketReturnType; typedef SparseMatrixBase StorageBaseType; - typedef typename NumTraits::Real RealScalar; - typedef internal::special_scalar_op_base > Base; - - using Base::operator*; - using Base::operator/; typedef Matrix IndexVector; typedef Matrix ScalarVector; @@ -146,6 +141,10 @@ template class SparseMatrixBase inline Derived& derived() { return *static_cast(this); } inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } + + typedef internal::special_scalar_op_base > Base; + using Base::operator*; + using Base::operator/; #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase From 05a0ee25dfcf5f16347e7e8a8903f91b89d48166 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 29 Oct 2015 21:06:07 +0100 Subject: [PATCH 145/344] Fix warning. --- doc/special_examples/random_cpp11.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/special_examples/random_cpp11.cpp b/doc/special_examples/random_cpp11.cpp index ccd7c77d0..adc3c110c 100644 --- a/doc/special_examples/random_cpp11.cpp +++ b/doc/special_examples/random_cpp11.cpp @@ -7,7 +7,7 @@ using namespace Eigen; int main() { std::default_random_engine generator; std::poisson_distribution distribution(4.1); - auto poisson = [&] (int) {return distribution(generator);}; + auto poisson = [&] (Eigen::Index) {return distribution(generator);}; RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson ); std::cout << v << "\n"; From ac142773a74120d65b323704a902761a9b7375aa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 13:13:39 -0700 Subject: [PATCH 146/344] Don't call internal::check_rows_cols_for_overflow twice in PlainObjectBase::resize since this is extremely expensive for small arrays --- Eigen/src/Core/PlainObjectBase.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 48e29ebdc..6f1350dc0 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -263,7 +263,6 @@ class PlainObjectBase : public internal::dense_xpr_base::type m_storage.resize(size, rows, cols); if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED #else - internal::check_rows_cols_for_overflow::run(rows, cols); m_storage.resize(rows*cols, rows, cols); #endif } From 0974a579101b50dc87a1a3f39cdaab37b2dcad23 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 15:00:06 -0700 Subject: [PATCH 147/344] Silenced compiler warning --- unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 89e28bdb5..145ca0d64 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -402,7 +402,7 @@ template struct sizes_match_below_dim { - static inline bool run(Dims1& dims1, Dims2& dims2) { + static inline bool run(Dims1&, Dims2&) { return false; } }; From 09ea3a7acdfa1d382de7264b080921205087ce57 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 16:22:52 -0700 Subject: [PATCH 148/344] Silenced a few more compilation warnings --- unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 96a7d5c20..7cd04a99e 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -354,7 +354,7 @@ struct h_array_reduce template struct h_array_reduce { - constexpr static inline T run(const std::array& arr, T identity) + constexpr static inline T run(const std::array&, T identity) { return identity; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 62f5ff923..10def5349 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -219,8 +219,8 @@ struct TensorEvaluator, D ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); if (!is_power_of_two) { ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len)); - for (int i = 0; i < line_len + 1; ++i) { - pos_j_base_powered[i] = std::pow(pos_j_base, i * i); + for (int j = 0; j < line_len + 1; ++j) { + pos_j_base_powered[j] = std::pow(pos_j_base, j * j); } } From c444a0a8c3925ed07dc639259d616e771b28aef0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 16:39:47 -0700 Subject: [PATCH 149/344] Consistently use the same index type in the fft codebase. --- .../Eigen/CXX11/src/Tensor/TensorFFT.h | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 10def5349..215a4ebad 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -200,7 +200,7 @@ struct TensorEvaluator, D const bool write_to_out = internal::is_same::value; ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); - for (int i = 0; i < m_size; ++i) { + for (Index i = 0; i < m_size; ++i) { buf[i] = MakeComplex::value>()(m_impl.coeff(i)); } @@ -211,15 +211,15 @@ struct TensorEvaluator, D eigen_assert(line_len >= 1); ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); const bool is_power_of_two = isPowerOfTwo(line_len); - const int good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); - const int log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); + const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); + const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); if (!is_power_of_two) { ComplexScalar pos_j_base = ComplexScalar(std::cos(M_PI/line_len), std::sin(M_PI/line_len)); - for (int j = 0; j < line_len + 1; ++j) { + for (Index j = 0; j < line_len + 1; ++j) { pos_j_base_powered[j] = std::pow(pos_j_base, j * j); } } @@ -228,7 +228,7 @@ struct TensorEvaluator, D Index base_offset = getBaseOffsetFromIndex(partial_index, dim); // get data into line_buf - for (int j = 0; j < line_len; ++j) { + for (Index j = 0; j < line_len; ++j) { Index offset = getIndexFromOffset(base_offset, dim, j); line_buf[j] = buf[offset]; } @@ -242,7 +242,7 @@ struct TensorEvaluator, D } // write back - for (int j = 0; j < line_len; ++j) { + for (Index j = 0; j < line_len; ++j) { const ComplexScalar div_factor = (FFTDir == FFT_FORWARD) ? ComplexScalar(1, 0) : ComplexScalar(line_len, 0); Index offset = getIndexFromOffset(base_offset, dim, j); buf[offset] = line_buf[j] / div_factor; @@ -257,45 +257,45 @@ struct TensorEvaluator, D } if(!write_to_out) { - for (int i = 0; i < m_size; ++i) { + for (Index i = 0; i < m_size; ++i) { data[i] = PartOf()(buf[i]); } m_device.deallocate(buf); } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(int x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { eigen_assert(x > 0); return !(x & (x - 1)); } // The composite number for padding, used in Bluestein's FFT algorithm - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int findGoodComposite(int n) { - int i = 2; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { + Index i = 2; while (i < 2 * n - 1) i *= 2; return i; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int getLog2(int m) { - int log2m = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { + Index log2m = 0; while (m >>= 1) log2m++; return log2m; } // Call Cooley Tukey algorithm directly, data length must be power of 2 - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, int line_len, int log_len) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { eigen_assert(isPowerOfTwo(line_len)); scramble_FFT(line_buf, line_len); compute_1D_Butterfly(line_buf, line_len, log_len); } // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, int line_len, int good_composite, int log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { - int n = line_len; - int m = good_composite; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { + Index n = line_len; + Index m = good_composite; ComplexScalar* data = line_buf; - for (int i = 0; i < n; ++i) { + for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { a[i] = data[i] * std::conj(pos_j_base_powered[i]); } @@ -303,11 +303,11 @@ struct TensorEvaluator, D a[i] = data[i] * pos_j_base_powered[i]; } } - for (int i = n; i < m; ++i) { + for (Index i = n; i < m; ++i) { a[i] = ComplexScalar(0, 0); } - for (int i = 0; i < n; ++i) { + for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { b[i] = pos_j_base_powered[i]; } @@ -315,10 +315,10 @@ struct TensorEvaluator, D b[i] = std::conj(pos_j_base_powered[i]); } } - for (int i = n; i < m - n; ++i) { + for (Index i = n; i < m - n; ++i) { b[i] = ComplexScalar(0, 0); } - for (int i = m - n; i < m; ++i) { + for (Index i = m - n; i < m; ++i) { if(FFTDir == FFT_FORWARD) { b[i] = pos_j_base_powered[m-i]; } @@ -333,7 +333,7 @@ struct TensorEvaluator, D scramble_FFT(b, m); compute_1D_Butterfly(b, m, log_len); - for (int i = 0; i < m; ++i) { + for (Index i = 0; i < m; ++i) { a[i] *= b[i]; } @@ -341,11 +341,11 @@ struct TensorEvaluator, D compute_1D_Butterfly(a, m, log_len); //Do the scaling after ifft - for (int i = 0; i < m; ++i) { + for (Index i = 0; i < m; ++i) { a[i] /= m; } - for (int i = 0; i < n; ++i) { + for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { data[i] = a[i] * std::conj(pos_j_base_powered[i]); } @@ -355,14 +355,14 @@ struct TensorEvaluator, D } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, int n) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { eigen_assert(isPowerOfTwo(n)); - int j = 1; - for (int i = 1; i < n; ++i){ + Index j = 1; + for (Index i = 1; i < n; ++i){ if (j > i) { std::swap(data[j-1], data[i-1]); } - int m = n >> 1; + Index m = n >> 1; while (m >= 2 && j > m) { j -= m; m >>= 1; @@ -372,7 +372,7 @@ struct TensorEvaluator, D } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, int n, int n_power_of_2) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(ComplexScalar* data, Index n, Index n_power_of_2) { eigen_assert(isPowerOfTwo(n)); if (n == 1) { return; @@ -467,7 +467,7 @@ struct TensorEvaluator, D const ComplexScalar wp(wtemp, wpi); ComplexScalar w(1.0, 0.0); - for(int i = 0; i < n/2; i++) { + for(Index i = 0; i < n/2; i++) { ComplexScalar temp(data[i + n/2] * w); data[i + n/2] = data[i] - temp; data[i] += temp; @@ -490,7 +490,7 @@ struct TensorEvaluator, D result += index; } else { - for (int i = 0; i < omitted_dim; ++i) { + for (Index i = 0; i < omitted_dim; ++i) { const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; const Index idx = index / partial_m_stride; index -= idx * partial_m_stride; @@ -508,7 +508,7 @@ struct TensorEvaluator, D } protected: - int m_size; + Index m_size; const FFT& m_fft; Dimensions m_dimensions; array m_strides; From 1b0685d09ae3529adf4f943b13e45fe193cca26a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 17:27:38 -0700 Subject: [PATCH 150/344] Added support for rank-0 tensors --- unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index be0b07cdf..902f25247 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -319,7 +319,7 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); } From 0d7a23d34e9e898f538220816704e5add4b03f17 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 17:29:49 -0700 Subject: [PATCH 151/344] Extended the reduction code so that reducing an empty set returns the neural element for the operation --- .../Eigen/CXX11/src/Core/util/CXX11Meta.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 7cd04a99e..7e4929ff8 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -252,6 +252,13 @@ template< typename... Ts > struct reduce; +template< + typename Reducer +> struct reduce +{ + constexpr static inline int run() { return Reducer::Identity; } +}; + template< typename Reducer, typename A, @@ -275,8 +282,14 @@ template< /* generic binary operations */ -struct sum_op { template constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } }; -struct product_op { template constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } }; +struct sum_op { + template constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } + static constexpr int Identity = 0; +}; +struct product_op { + template constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } + static constexpr int Identity = 1; +}; struct logical_and_op { template constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } }; struct logical_or_op { template constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } }; From 3785c692948a9371dea4aa0a6d0530b833c5d4fa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 17:31:03 -0700 Subject: [PATCH 152/344] Added support for fixed sized tensors of rank 0 --- .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 5f3e49e61..bf930f6b8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -90,6 +90,14 @@ class TensorFixedSize : public TensorBase inline Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) @@ -114,6 +122,14 @@ class TensorFixedSize : public TensorBase inline const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const @@ -138,6 +154,13 @@ class TensorFixedSize : public TensorBase Date: Thu, 29 Oct 2015 17:49:04 -0700 Subject: [PATCH 153/344] Added support for tensor maps of rank 0. --- .../Eigen/CXX11/src/Tensor/TensorMap.h | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 55c289810..4347bc2ff 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -52,6 +52,12 @@ template class TensorMap : public Tensor CoordAccess = true, }; + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { @@ -121,11 +127,18 @@ template class TensorMap : public Tensor } } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const { - static_assert(sizeof...(otherIndices) + 1 == NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) if (PlainObjectType::Options&RowMajor) { const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, otherIndices...}}); return m_data[index]; @@ -200,6 +213,13 @@ template class TensorMap : public Tensor } } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + #ifdef EIGEN_HAS_VARIADIC_TEMPLATES template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) From 31bdafac67268ace9c4eeda4a225379609ce8b99 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 17:56:48 -0700 Subject: [PATCH 154/344] Added a few tests to cover rank-0 tensors --- unsupported/test/cxx11_tensor_fixed_size.cpp | 28 ++++++++++++++++++ unsupported/test/cxx11_tensor_map.cpp | 19 +++++++++++++ unsupported/test/cxx11_tensor_simple.cpp | 30 ++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index 5252e4d72..1c33fefb3 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -15,6 +15,33 @@ using Eigen::Tensor; using Eigen::RowMajor; +static void test_0d() +{ + TensorFixedSize > scalar1; + TensorFixedSize, RowMajor> scalar2; + VERIFY_IS_EQUAL(scalar1.rank(), 0); + + scalar1() = 7.0; + scalar2() = 13.0; + + // Test against shallow copy. + TensorFixedSize > copy = scalar1; + VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data()); + VERIFY_IS_APPROX(scalar1(), copy()); + copy = scalar1; + VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data()); + VERIFY_IS_APPROX(scalar1(), copy()); + + TensorFixedSize > scalar3 = scalar1.sqrt(); + TensorFixedSize, RowMajor> scalar4 = scalar2.sqrt(); + VERIFY_IS_EQUAL(scalar3.rank(), 0); + VERIFY_IS_APPROX(scalar3(), sqrtf(7.0)); + VERIFY_IS_APPROX(scalar4(), sqrtf(13.0)); + + scalar3 = scalar1 + scalar2; + VERIFY_IS_APPROX(scalar3(), 7.0f + 13.0f); +} + static void test_1d() { TensorFixedSize > vec1; @@ -223,6 +250,7 @@ static void test_array() void test_cxx11_tensor_fixed_size() { + CALL_SUBTEST(test_0d()); CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_tensor_map()); CALL_SUBTEST(test_2d()); diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index 4c4e10df2..9e79209bb 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -14,6 +14,24 @@ using Eigen::Tensor; using Eigen::RowMajor; +static void test_0d() +{ + Tensor scalar1; + Tensor scalar2; + + TensorMap> scalar3(scalar1.data()); + TensorMap> scalar4(scalar2.data()); + + scalar1() = 7; + scalar2() = 13; + + VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + + VERIFY_IS_EQUAL(scalar3(), 7); + VERIFY_IS_EQUAL(scalar4(), 13); +} + static void test_1d() { Tensor vec1(6); @@ -242,6 +260,7 @@ static void test_casting() void test_cxx11_tensor_map() { + CALL_SUBTEST(test_0d()); CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp index 0ce92eed9..47d4d8636 100644 --- a/unsupported/test/cxx11_tensor_simple.cpp +++ b/unsupported/test/cxx11_tensor_simple.cpp @@ -14,6 +14,35 @@ using Eigen::Tensor; using Eigen::RowMajor; +static void test_0d() +{ + Tensor scalar1; + Tensor scalar2; + Tensor scalar3; + Tensor scalar4; + + scalar3.resize(); + scalar4.resize(); + + scalar1() = 7; + scalar2() = 13; + scalar3.setValues(17); + scalar4.setZero(); + + VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + + VERIFY_IS_EQUAL(scalar1(), 7); + VERIFY_IS_EQUAL(scalar2(), 13); + VERIFY_IS_EQUAL(scalar3(), 17); + VERIFY_IS_EQUAL(scalar4(), 0); + + Tensor scalar5(scalar1); + + VERIFY_IS_EQUAL(scalar5(), 7); + VERIFY_IS_EQUAL(scalar5.data()[0], 7); +} + static void test_1d() { Tensor vec1(6); @@ -287,6 +316,7 @@ static void test_resize() void test_cxx11_tensor_simple() { + CALL_SUBTEST(test_0d()); CALL_SUBTEST(test_1d()); CALL_SUBTEST(test_2d()); CALL_SUBTEST(test_3d()); From 6a02c2a85df05783899ebb8c225cd8677737006c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 29 Oct 2015 20:21:29 -0700 Subject: [PATCH 155/344] Fixed a compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e8447e84c..fa62b25c2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -163,7 +163,7 @@ class SimpleTensorContractionMapper { template class BaseTensorContractionMapper : public SimpleTensorContractionMapper { @@ -275,14 +275,14 @@ class BaseTensorContractionMapper class TensorContractionInputMapper; template class TensorContractionSubMapper { public: @@ -344,7 +344,7 @@ class TensorContractionSubMapper { template class TensorContractionInputMapper : public BaseTensorContractionMapper { From 213bd0253abfdd96dcfaa54f813e13fb98fb306b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 08:44:37 +0100 Subject: [PATCH 156/344] Fix gcc 4.4 compilation issue --- test/sparse_permutations.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/sparse_permutations.cpp b/test/sparse_permutations.cpp index 8c257a3fd..b82cceff8 100644 --- a/test/sparse_permutations.cpp +++ b/test/sparse_permutations.cpp @@ -76,34 +76,34 @@ template void sparse_permutations(c randomPermutationVector(pi, cols); p.indices() = pi; - VERIFY( is_sorted( eval(mat*p) )); + VERIFY( is_sorted( ::eval(mat*p) )); VERIFY( is_sorted( res = mat*p )); - VERIFY_TRANSPOSITION_COUNT( eval(mat*p), 0); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p), 0); //VERIFY_TRANSPOSITION_COUNT( res = mat*p, IsRowMajor ? 1 : 0 ); res_d = mat_d*p; VERIFY(res.isApprox(res_d) && "mat*p"); - VERIFY( is_sorted( eval(p*mat) )); + VERIFY( is_sorted( ::eval(p*mat) )); VERIFY( is_sorted( res = p*mat )); - VERIFY_TRANSPOSITION_COUNT( eval(p*mat), 0); + VERIFY_TRANSPOSITION_COUNT( ::eval(p*mat), 0); res_d = p*mat_d; VERIFY(res.isApprox(res_d) && "p*mat"); VERIFY( is_sorted( (mat*p).eval() )); VERIFY( is_sorted( res = mat*p.inverse() )); - VERIFY_TRANSPOSITION_COUNT( eval(mat*p.inverse()), 0); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p.inverse()), 0); res_d = mat*p.inverse(); VERIFY(res.isApprox(res_d) && "mat*inv(p)"); VERIFY( is_sorted( (p*mat+p*mat).eval() )); VERIFY( is_sorted( res = p.inverse()*mat )); - VERIFY_TRANSPOSITION_COUNT( eval(p.inverse()*mat), 0); + VERIFY_TRANSPOSITION_COUNT( ::eval(p.inverse()*mat), 0); res_d = p.inverse()*mat_d; VERIFY(res.isApprox(res_d) && "inv(p)*mat"); VERIFY( is_sorted( (p * mat * p.inverse()).eval() )); VERIFY( is_sorted( res = mat.twistedBy(p) )); - VERIFY_TRANSPOSITION_COUNT( eval(p * mat * p.inverse()), 0); + VERIFY_TRANSPOSITION_COUNT( ::eval(p * mat * p.inverse()), 0); res_d = (p * mat_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "p*mat*inv(p)"); From 27c56bf60feeec622b3bf7610f5f969fb7441753 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 10:57:11 +0100 Subject: [PATCH 157/344] Workaround compilation issue with MSVC<=2013 --- Eigen/src/SparseCore/SparseAssign.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index cb154d1c2..923034382 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -132,9 +132,9 @@ struct Assignment } }; -// Sparse to Dense assignment -template< typename DstXprType, typename SrcXprType, typename Functor> -struct Assignment +// Generic Sparse to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment { typedef typename DstXprType::Scalar Scalar; static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) From 0e6cb08f929cfd2cade52f917eb8c06df3b87606 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 11:44:22 +0100 Subject: [PATCH 158/344] Fix shadow warning --- Eigen/src/SparseCore/SparseAssign.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 923034382..c5589492e 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -136,7 +136,6 @@ struct Assignment template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { - typedef typename DstXprType::Scalar Scalar; static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); From c8c8821038e5216e84eccb09e299d929aa0750d1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 12:00:34 +0100 Subject: [PATCH 159/344] Biug 1100: remove explicit CMAKE_INSTALL_PREFIX prefix to please cmake install's DESTINATION argument --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e037af3bc..401400a21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -354,7 +354,7 @@ if(EIGEN_INCLUDE_INSTALL_DIR) ) else() set(INCLUDE_INSTALL_DIR - "${CMAKE_INSTALL_PREFIX}/include/eigen3" + "include/eigen3" CACHE INTERNAL "The directory where we install the header files (internal)" ) @@ -457,7 +457,7 @@ if(cmake_generator_tolower MATCHES "makefile") message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:") message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath") message(STATUS " | Eigen headers will then be installed to:") - message(STATUS " | ${INCLUDE_INSTALL_DIR}") + message(STATUS " | ${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}") message(STATUS " | To install Eigen headers to a separate location, do:") message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath") message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX") From ddaaa2d38184680e0e2cf8e26eaf4c1b0b8fa8fc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 12:02:52 +0100 Subject: [PATCH 160/344] bug #1101: typo --- Eigen/SparseCore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/SparseCore b/Eigen/SparseCore index c2a25f9b5..76966c4c4 100644 --- a/Eigen/SparseCore +++ b/Eigen/SparseCore @@ -21,7 +21,7 @@ /** * \defgroup SparseCore_Module SparseCore module * - * This module provides a sparse matrix representation, and basic associatd matrix manipulations + * This module provides a sparse matrix representation, and basic associated matrix manipulations * and operations. * * See the \ref TutorialSparse "Sparse tutorial" From 9285647dfe91c2fe3db112e71b219ee16db81e87 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 14:44:22 +0100 Subject: [PATCH 161/344] Limit matrix size when testing for NaN: they can become prohibitively expensive when running on x87 fp unit --- test/eigensolver_selfadjoint.cpp | 2 +- test/schur_complex.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index a7b4deb5d..f909761a1 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp @@ -149,7 +149,7 @@ template void selfadjointeigensolver(const MatrixType& m) VERIFY_IS_APPROX(tridiag.matrixT(), eiSymmTridiag.eigenvectors().real() * eiSymmTridiag.eigenvalues().asDiagonal() * eiSymmTridiag.eigenvectors().real().transpose()); } - if (rows > 1) + if (rows > 1 && rows < 20) { // Test matrix with NaN symmC(0,0) = std::numeric_limits::quiet_NaN(); diff --git a/test/schur_complex.cpp b/test/schur_complex.cpp index 5e869790f..deb78e44e 100644 --- a/test/schur_complex.cpp +++ b/test/schur_complex.cpp @@ -25,7 +25,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim ComplexMatrixType T = schurOfA.matrixT(); for(int row = 1; row < size; ++row) { for(int col = 0; col < row; ++col) { - VERIFY(T(row,col) == (typename MatrixType::Scalar)0); + VERIFY(T(row,col) == (typename MatrixType::Scalar)0); } } VERIFY_IS_APPROX(A.template cast(), U * T * U.adjoint()); @@ -70,7 +70,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim VERIFY_IS_EQUAL(cs1.matrixT(), csOnlyT.matrixT()); VERIFY_RAISES_ASSERT(csOnlyT.matrixU()); - if (size > 1) + if (size > 1 && size < 20) { // Test matrix with NaN A(0,0) = std::numeric_limits::quiet_NaN(); From fdf3030ff86fc7d194e6c67acafca80d3148991c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 15:07:50 +0100 Subject: [PATCH 162/344] Limit matrix sizes for trmm unit test and complexes. --- test/product_trmm.cpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/test/product_trmm.cpp b/test/product_trmm.cpp index d715b9a36..12e554410 100644 --- a/test/product_trmm.cpp +++ b/test/product_trmm.cpp @@ -9,10 +9,18 @@ #include "main.h" +template +int get_random_size() +{ + const int factor = NumTraits::ReadCost; + const int max_test_size = EIGEN_TEST_MAX_SIZE>2*factor ? EIGEN_TEST_MAX_SIZE/factor : EIGEN_TEST_MAX_SIZE; + return internal::random(1,max_test_size); +} + template -void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), - int cols=internal::random(1,EIGEN_TEST_MAX_SIZE), - int otherCols = OtherCols==Dynamic?internal::random(1,EIGEN_TEST_MAX_SIZE):OtherCols) +void trmm(int rows=get_random_size(), + int cols=get_random_size(), + int otherCols = OtherCols==Dynamic?get_random_size():OtherCols) { typedef Matrix TriMatrix; typedef Matrix OnTheRight; @@ -42,13 +50,13 @@ void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView() * ge_right, tri * ge_right); VERIFY_IS_APPROX( ge_sx.noalias() = ge_left * mat.template triangularView(), ge_left * tri); - + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView(), ge_right.transpose() * triTr.conjugate()); VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()), s1*triTr.conjugate() * (s2*ge_left.adjoint())); VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView(), ge_right.adjoint() * triTr.conjugate()); - + ge_xs_save = ge_xs; VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()) ); ge_sx.setRandom(); @@ -61,13 +69,13 @@ void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), } template -void trmv(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), int cols=internal::random(1,EIGEN_TEST_MAX_SIZE)) +void trmv(int rows=get_random_size(), int cols=get_random_size()) { trmm(rows,cols,1); } template -void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), int cols=internal::random(1,EIGEN_TEST_MAX_SIZE), int otherCols = internal::random(1,EIGEN_TEST_MAX_SIZE)) +void trmm(int rows=get_random_size(), int cols=get_random_size(), int otherCols = get_random_size()) { trmm(rows,cols,otherCols); } From 8a3151de2eae8299629417c98c1040b7ced5bdb5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 18:06:03 +0100 Subject: [PATCH 163/344] Limit matrix size for other eigen and schur decompositions --- test/eigensolver_complex.cpp | 2 +- test/eigensolver_generic.cpp | 2 +- test/schur_real.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/eigensolver_complex.cpp b/test/eigensolver_complex.cpp index ad982ed40..8e2bb9ef0 100644 --- a/test/eigensolver_complex.cpp +++ b/test/eigensolver_complex.cpp @@ -118,7 +118,7 @@ template void eigensolver(const MatrixType& m) MatrixType id = MatrixType::Identity(rows, cols); VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); - if (rows > 1) + if (rows > 1 && rows < 20) { // Test matrix with NaN a(0,0) = std::numeric_limits::quiet_NaN(); diff --git a/test/eigensolver_generic.cpp b/test/eigensolver_generic.cpp index c42fcaeba..566546310 100644 --- a/test/eigensolver_generic.cpp +++ b/test/eigensolver_generic.cpp @@ -63,7 +63,7 @@ template void eigensolver(const MatrixType& m) MatrixType id = MatrixType::Identity(rows, cols); VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); - if (rows > 2) + if (rows > 2 && rows < 20) { // Test matrix with NaN a(0,0) = std::numeric_limits::quiet_NaN(); diff --git a/test/schur_real.cpp b/test/schur_real.cpp index 36b9c24d1..e67db6433 100644 --- a/test/schur_real.cpp +++ b/test/schur_real.cpp @@ -91,7 +91,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim VERIFY_IS_EQUAL(rs1.matrixT(), rsOnlyT.matrixT()); VERIFY_RAISES_ASSERT(rsOnlyT.matrixU()); - if (size > 2) + if (size > 2 && rows < 20) { // Test matrix with NaN A(0,0) = std::numeric_limits::quiet_NaN(); From 5a2007f7e4dcf72e40c3e45f2f86bcc9bd7c0fae Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 22:16:23 +0100 Subject: [PATCH 164/344] typo --- test/schur_real.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/schur_real.cpp b/test/schur_real.cpp index e67db6433..cfe4570d4 100644 --- a/test/schur_real.cpp +++ b/test/schur_real.cpp @@ -91,7 +91,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim VERIFY_IS_EQUAL(rs1.matrixT(), rsOnlyT.matrixT()); VERIFY_RAISES_ASSERT(rsOnlyT.matrixU()); - if (size > 2 && rows < 20) + if (size > 2 && size < 20) { // Test matrix with NaN A(0,0) = std::numeric_limits::quiet_NaN(); From b32948c6427635aa91cd585a2eceecef8440fbd1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Oct 2015 22:25:59 +0100 Subject: [PATCH 165/344] bug #1102: fix multiple definition linking issue --- Eigen/src/Core/MathFunctions.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 1820fc1c8..4d4611c6b 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -871,20 +871,22 @@ template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) } //MSVC defines a _isnan builtin function, but for double only -template<> EIGEN_DEVICE_FUNC bool (isnan)(const long double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC bool (isnan)(const float& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC inline bool (isnan)(const long double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC inline bool (isnan)(const double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC inline bool (isnan)(const float& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC bool (isinf)(const long double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC bool (isinf)(const double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC bool (isinf)(const float& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const long double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC inline bool (isinf./)(const float& x) { return isinf_msvc_helper(x); } #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) #if EIGEN_GNUC_AT_LEAST(5,0) - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((optimize("no-finite-math-only"))) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) #else - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC __attribute__((noinline,optimize("no-finite-math-only"))) + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) #endif template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return __builtin_isnan(x); } From c0352197a1fc8ceb2de5385c7f6d259eabe7a9c9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 31 Oct 2015 18:06:28 +0100 Subject: [PATCH 166/344] bug #1099: add missing incude for CUDA --- Eigen/src/Core/util/Meta.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index ef35cefb4..a7e7555e9 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -13,6 +13,7 @@ #if defined(__CUDA_ARCH__) #include +#include #endif namespace Eigen { From d46e2c10a60759e2b5a4b8e711832780215d768a Mon Sep 17 00:00:00 2001 From: Alexandre Avenel Date: Sun, 1 Nov 2015 10:49:27 +0100 Subject: [PATCH 167/344] Add round, ceil and floor for SSE4.1/AVX (Bug #70) --- Eigen/src/Core/arch/AVX/PacketMath.h | 19 +++++++++++++++++-- Eigen/src/Core/arch/SSE/PacketMath.h | 27 +++++++++++++++++++++++++-- test/packetmath.cpp | 4 ++++ 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index dfdb71abd..b313fb09a 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -66,7 +66,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1 }; }; template<> struct packet_traits : default_packet_traits @@ -83,7 +86,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1 }; }; @@ -176,6 +182,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin(const Packet4d& a, const template<> EIGEN_STRONG_INLINE Packet8f pmax(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pmax(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet8f& a) { return _mm256_round_ps(a, 0); } +template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet4d& a) { return _mm256_round_pd(a, 0); } + +template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet8f& a) { return _mm256_ceil_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet4d& a) { return _mm256_ceil_pd(a); } + +template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet8f& a) { return _mm256_floor_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet4d& a) { return _mm256_floor_pd(a); } + template<> EIGEN_STRONG_INLINE Packet8f pand(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pand(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3c30b2cf0..3fcb1c138 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -109,7 +109,13 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + +#ifdef EIGEN_VECTORIZE_SSE4_1 + HasRound = 1, + HasFloor = 1, + HasCeil = 1 +#endif }; }; template<> struct packet_traits : default_packet_traits @@ -126,7 +132,13 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + +#ifdef EIGEN_VECTORIZE_SSE4_1 + HasRound = 1, + HasFloor = 1, + HasCeil = 1 +#endif }; }; #endif @@ -256,6 +268,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const #endif } +#ifdef EIGEN_VECTORIZE_SSE4_1 +template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return _mm_round_ps(a, 0); } +template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { return _mm_round_pd(a, 0); } + +template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { return _mm_ceil_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { return _mm_ceil_pd(a); } + +template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { return _mm_floor_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return _mm_floor_pd(a); } +#endif + template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 568058f1a..dea648002 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -296,6 +296,10 @@ template void packetmath_real() EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; + CHECK_CWISE1_IF(PacketTraits::HasRound, std::round, internal::pround); + CHECK_CWISE1_IF(PacketTraits::HasCeil, std::ceil, internal::pceil); + CHECK_CWISE1_IF(PacketTraits::HasFloor, std::floor, internal::pfloor); + for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); From 29a94c80554920e016ef960853cd16c19702994a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 2 Nov 2015 16:11:59 +0100 Subject: [PATCH 168/344] compilation issue --- Eigen/src/Core/MathFunctions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 4d4611c6b..2ac6f4c67 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -877,7 +877,7 @@ template<> EIGEN_DEVICE_FUNC inline bool (isnan)(const float& x) { return template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const long double& x) { return isinf_msvc_helper(x); } template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC inline bool (isinf./)(const float& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const float& x) { return isinf_msvc_helper(x); } #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) @@ -885,7 +885,7 @@ template<> EIGEN_DEVICE_FUNC inline bool (isinf./)(const float& x) { retur #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) #else // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), - // while the second prevent too aggressive optimizations in fast-math mode + // while the second prevent too aggressive optimizations in fast-math mode: #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) #endif From 36cd6daaae4e13711853b7c6ecab8a7cf3a84217 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 3 Nov 2015 16:36:30 -0800 Subject: [PATCH 169/344] Made the CUDA implementation of ploadt_ro compatible with cuda implementations older than 3.5 --- Eigen/src/Core/arch/CUDA/PacketMath.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index a2d803c06..0d2c2fef0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -177,7 +177,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(double* to to[1] = from.y; } -#ifdef __CUDA_ARCH__ +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro(const float* from) { return __ldg((const float4*)from); From f6b1deebab06dda60ffea454ead6b581a138b513 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 4 Nov 2015 17:02:32 +0100 Subject: [PATCH 170/344] Fix compilation of sparse-triangular to dense assignment --- Eigen/src/SparseCore/SparseAssign.h | 1 + test/sparse_basic.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index c5589492e..4a8dd12e4 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -64,6 +64,7 @@ struct Sparse2Dense {}; template<> struct AssignmentKind { typedef Sparse2Sparse Kind; }; template<> struct AssignmentKind { typedef Sparse2Sparse Kind; }; template<> struct AssignmentKind { typedef Sparse2Dense Kind; }; +template<> struct AssignmentKind { typedef Sparse2Dense Kind; }; template diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index d8e42e984..0c7d2c652 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -338,6 +338,10 @@ template void sparse_basic(const SparseMatrixType& re refMat3 = refMat2.template triangularView(); m3 = m2.template triangularView(); VERIFY_IS_APPROX(m3, refMat3); + + // check sparse-traingular to dense + refMat3 = m2.template triangularView(); + VERIFY_IS_APPROX(refMat3, DenseMatrix(refMat2.template triangularView())); } // test selfadjointView From 902750826b52de97f2cd48d91fcf4f70d874e93c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 4 Nov 2015 17:42:07 +0100 Subject: [PATCH 171/344] Add support for dense.cwiseProduct(sparse) This also fixes a regression regarding (dense*sparse).diagonal() --- Eigen/src/Core/MatrixBase.h | 9 +++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 4 +++- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 4 ++-- Eigen/src/SparseCore/SparseMatrixBase.h | 22 ++++++++++------------ Eigen/src/SparseCore/SparseUtil.h | 1 - test/sparse_basic.cpp | 2 ++ 6 files changed, 26 insertions(+), 16 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index b5afff005..9d612c852 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -438,6 +438,15 @@ template class MatrixBase template void applyOnTheRight(Index p, Index q, const JacobiRotation& j); +///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase &other) const + { + return other.cwiseProduct(derived()); + } + ///////// MatrixFunctions module ///////// typedef typename internal::stem_function::type StemFunction; diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index c2d5f4316..34697765d 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -265,7 +265,6 @@ template class Rotation2D; template class AngleAxis; template class Translation; template class AlignedBox; - template class Quaternion; template class Transform; template class ParametrizedLine; @@ -273,6 +272,9 @@ template class Hyperp template class UniformScaling; template class Homogeneous; +// Sparse module: +template class SparseMatrixBase; + // MatrixFunctions module template struct MatrixExponentialReturnValue; template class MatrixFunctionReturnValue; diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index abbbf397b..90f702ee3 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -423,10 +423,10 @@ Derived& SparseMatrixBase::operator-=(const DiagonalBase& template template -EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE +EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type SparseMatrixBase::cwiseProduct(const MatrixBase &other) const { - return EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE(derived(), other.derived()); + return typename CwiseProductDenseReturnType::Type(derived(), other.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index ff417302f..648ae1f8a 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -262,20 +262,18 @@ template class SparseMatrixBase Derived& operator*=(const Scalar& other); Derived& operator/=(const Scalar& other); - #define EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE \ - CwiseBinaryOp< \ - internal::scalar_product_op< \ - typename internal::scalar_product_traits< \ - typename internal::traits::Scalar, \ - typename internal::traits::Scalar \ - >::ReturnType \ - >, \ - const Derived, \ - const OtherDerived \ - > + template struct CwiseProductDenseReturnType { + typedef CwiseBinaryOp::Scalar, + typename internal::traits::Scalar + >::ReturnType>, + const Derived, + const OtherDerived + > Type; + }; template - EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE + EIGEN_STRONG_INLINE const typename CwiseProductDenseReturnType::Type cwiseProduct(const MatrixBase &other) const; // sparse * diagonal diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index 62f0f6864..74df0d496 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -49,7 +49,6 @@ const int InnerRandomAccessPattern = 0x2 | CoherentAccessPattern; const int OuterRandomAccessPattern = 0x4 | CoherentAccessPattern; const int RandomAccessPattern = 0x8 | OuterRandomAccessPattern | InnerRandomAccessPattern; -template class SparseMatrixBase; template class SparseMatrix; template class DynamicSparseMatrix; template class SparseVector; diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 0c7d2c652..2d0f5819f 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -188,6 +188,8 @@ template void sparse_basic(const SparseMatrixType& re refM4.setRandom(); // sparse cwise* dense VERIFY_IS_APPROX(m3.cwiseProduct(refM4), refM3.cwiseProduct(refM4)); + // dense cwise* sparse + VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3)); // VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4); // test aliasing From 90323f1751ca170490b5a7590410ef5d05d62ac5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 4 Nov 2015 22:15:57 +0100 Subject: [PATCH 172/344] Fix AVX round/ceil/floor, and fix respective unit test --- Eigen/src/Core/arch/AVX/PacketMath.h | 12 ++++++------ test/packetmath.cpp | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index b313fb09a..717ae67c5 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -182,14 +182,14 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin(const Packet4d& a, const template<> EIGEN_STRONG_INLINE Packet8f pmax(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pmax(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet8f& a) { return _mm256_round_ps(a, 0); } -template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet4d& a) { return _mm256_round_pd(a, 0); } +template<> EIGEN_STRONG_INLINE Packet8f pround(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); } +template<> EIGEN_STRONG_INLINE Packet4d pround(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); } -template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet8f& a) { return _mm256_ceil_ps(a); } -template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet4d& a) { return _mm256_ceil_pd(a); } +template<> EIGEN_STRONG_INLINE Packet8f pceil(const Packet8f& a) { return _mm256_ceil_ps(a); } +template<> EIGEN_STRONG_INLINE Packet4d pceil(const Packet4d& a) { return _mm256_ceil_pd(a); } -template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet8f& a) { return _mm256_floor_ps(a); } -template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet4d& a) { return _mm256_floor_pd(a); } +template<> EIGEN_STRONG_INLINE Packet8f pfloor(const Packet8f& a) { return _mm256_floor_ps(a); } +template<> EIGEN_STRONG_INLINE Packet4d pfloor(const Packet4d& a) { return _mm256_floor_pd(a); } template<> EIGEN_STRONG_INLINE Packet8f pand(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pand(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index dea648002..b6616ac5e 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -29,7 +29,7 @@ template bool areApproxAbs(const Scalar* a, const Scalar* b, in { if (!isApproxAbs(a[i],b[i],refvalue)) { - std::cout << "[" << Map >(a,size) << "]" << " != " << Map >(b,size) << "\n"; + std::cout << "ref: [" << Map >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; return false; } } @@ -42,7 +42,7 @@ template bool areApprox(const Scalar* a, const Scalar* b, int s { if (a[i]!=b[i] && !internal::isApprox(a[i],b[i])) { - std::cout << "[" << Map >(a,size) << "]" << " != " << Map >(b,size) << "\n"; + std::cout << "ref: [" << Map >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; return false; } } @@ -296,10 +296,6 @@ template void packetmath_real() EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; - CHECK_CWISE1_IF(PacketTraits::HasRound, std::round, internal::pround); - CHECK_CWISE1_IF(PacketTraits::HasCeil, std::ceil, internal::pceil); - CHECK_CWISE1_IF(PacketTraits::HasFloor, std::floor, internal::pfloor); - for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); @@ -308,6 +304,10 @@ template void packetmath_real() CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin); CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); + + CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround); + CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); + CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); for (int i=0; i Date: Wed, 4 Nov 2015 13:57:36 -0800 Subject: [PATCH 173/344] Updated the reduction code so that full reductions now return a tensor of rank 0. --- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 114 ++++++++++-------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 1d22843af..1d534f8ae 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -64,10 +64,10 @@ template struct DimInitializer { } }; -template <> struct DimInitializer > { +template <> struct DimInitializer > { template EIGEN_DEVICE_FUNC static void run(const InputDims& input_dims, const array&, - Sizes<1>*, array* reduced_dims) { + Sizes<0>*, array* reduced_dims) { const int NumInputDims = internal::array_size::value; for (int i = 0; i < NumInputDims; ++i) { (*reduced_dims)[i] = input_dims[i]; @@ -136,6 +136,12 @@ struct GenericDimReducer<0, Self, Op> { } } }; +template +struct GenericDimReducer<-1, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::CoeffReturnType*) { + eigen_assert(false && "should never be called"); + } +}; template struct InnerMostDimReducer { @@ -192,6 +198,12 @@ struct InnerMostDimPreserver<0, Self, Op, true> { } } }; +template +struct InnerMostDimPreserver<-1, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; // Default full reducer template @@ -550,8 +562,8 @@ struct TensorEvaluator, Device> typedef typename TensorEvaluator::Dimensions InputDimensions; static const int NumInputDims = internal::array_size::value; static const int NumReducedDims = internal::array_size::value; - static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims; - typedef typename internal::conditional, DSizes >::type Dimensions; + static const int NumOutputDims = NumInputDims - NumReducedDims; + typedef typename internal::conditional, DSizes >::type Dimensions; typedef typename XprType::Scalar Scalar; typedef TensorEvaluator, Device> Self; static const bool InputPacketAccess = TensorEvaluator::PacketAccess; @@ -565,7 +577,7 @@ struct TensorEvaluator, Device> static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims::value; - static const bool RunningFullReduction = (NumInputDims==NumReducedDims); + static const bool RunningFullReduction = (NumOutputDims==0); EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) @@ -589,51 +601,54 @@ struct TensorEvaluator, Device> internal::DimInitializer::run(input_dims, reduced, &m_dimensions, &m_reducedDims); // Precompute output strides. - if (static_cast(Layout) == static_cast(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_outputStrides[NumOutputDims - 1] = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - } - - // Precompute input strides. - array input_strides; - if (static_cast(Layout) == static_cast(ColMajor)) { - input_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_strides[i] = input_strides[i-1] * input_dims[i-1]; - } - } else { - input_strides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; - } - } - - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (reduced[i]) { - m_reducedStrides[reduceIndex] = input_strides[i]; - ++reduceIndex; + if (NumOutputDims > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } } else { - m_preservedStrides[outputIndex] = input_strides[i]; - ++outputIndex; + m_outputStrides[NumOutputDims - 1] = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } + } + } + + // Precompute input strides. + if (NumInputDims > 0) { + array input_strides; + if (static_cast(Layout) == static_cast(ColMajor)) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } + } else { + input_strides[NumInputDims - 1] = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } + } + + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + ++outputIndex; + } } } // Special case for full reductions - if (NumInputDims == NumReducedDims) { - eigen_assert(m_dimensions[0] == 1); + if (NumOutputDims == 0) { m_preservedStrides[0] = internal::array_prod(input_dims); } } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } typedef typename internal::remove_const::type CoeffReturnType; @@ -674,9 +689,9 @@ struct TensorEvaluator, Device> return *m_result; } Op reducer(m_reducer); - if (ReducingInnerMostDims) { + if (ReducingInnerMostDims || RunningFullReduction) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; return internal::InnerMostDimReducer::reduce(*this, firstInput(index), num_values_to_reduce, reducer); } else { @@ -697,7 +712,7 @@ struct TensorEvaluator, Device> EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; if (ReducingInnerMostDims) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; const Index firstIndex = firstInput(index); for (Index i = 0; i < packetSize; ++i) { Op reducer(m_reducer); @@ -748,7 +763,7 @@ struct TensorEvaluator, Device> if (static_cast(Layout) == static_cast(ColMajor)) { return index * m_preservedStrides[0]; } else { - return index * m_preservedStrides[NumOutputDims - 1]; + return index * m_preservedStrides[NumPreservedStrides - 1]; } } // TBD: optimize the case where we preserve the innermost dimensions. @@ -774,10 +789,10 @@ struct TensorEvaluator, Device> index -= idx * m_outputStrides[i]; } if (PreservingInnerMostDims) { - eigen_assert(m_preservedStrides[NumOutputDims - 1] == 1); + eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); startInput += index; } else { - startInput += index * m_preservedStrides[NumOutputDims - 1]; + startInput += index * m_preservedStrides[NumPreservedStrides - 1]; } } return startInput; @@ -789,7 +804,8 @@ struct TensorEvaluator, Device> array m_outputStrides; // Subset of strides of the input tensor for the non-reduced dimensions. // Indexed by output dimensions. - array m_preservedStrides; + static const int NumPreservedStrides = max_n_1::size; + array m_preservedStrides; // Subset of strides of the input tensor for the reduced dimensions. // Indexed by reduced dimensions. From 780eeb3be779ea3fd6065f283278793ac7557287 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 5 Nov 2015 00:32:48 -0800 Subject: [PATCH 174/344] prevent stack overflow in unit test --- test/product_small.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/product_small.cpp b/test/product_small.cpp index c561ec63b..2a2c1e35f 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -58,7 +58,7 @@ void test_product_small() } { - Eigen::Matrix A, B, C; + Eigen::Matrix A, B, C; A.setRandom(); C = A; for(int k=0; k<79; ++k) From 2844e7ae43553731c433ec1d331b3accc2027b48 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 5 Nov 2015 12:05:02 +0100 Subject: [PATCH 175/344] SPQR and UmfPack need to link to cholmod. (grafted from 47592d31eabfb09b811005a725ea5ca2eb5a6fcf ) --- cmake/FindSPQR.cmake | 7 ++++++- cmake/FindUmfpack.cmake | 21 +++++++++++++-------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/cmake/FindSPQR.cmake b/cmake/FindSPQR.cmake index 794c212af..1e958c3c1 100644 --- a/cmake/FindSPQR.cmake +++ b/cmake/FindSPQR.cmake @@ -26,7 +26,12 @@ if(SPQR_LIBRARIES) find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) if (SUITESPARSE_LIBRARY) set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif (SUITESPARSE_LIBRARY) + endif() + + find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(CHOLMOD_LIBRARY) + set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARY}) + endif() endif(SPQR_LIBRARIES) diff --git a/cmake/FindUmfpack.cmake b/cmake/FindUmfpack.cmake index 16b046cd6..53cf0b49b 100644 --- a/cmake/FindUmfpack.cmake +++ b/cmake/FindUmfpack.cmake @@ -20,24 +20,29 @@ find_library(UMFPACK_LIBRARIES umfpack PATHS $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR} if(UMFPACK_LIBRARIES) - if (NOT UMFPACK_LIBDIR) + if(NOT UMFPACK_LIBDIR) get_filename_component(UMFPACK_LIBDIR ${UMFPACK_LIBRARIES} PATH) endif(NOT UMFPACK_LIBDIR) find_library(COLAMD_LIBRARY colamd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if (COLAMD_LIBRARY) + if(COLAMD_LIBRARY) set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${COLAMD_LIBRARY}) - endif (COLAMD_LIBRARY) + endif () find_library(AMD_LIBRARY amd PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if (AMD_LIBRARY) + if(AMD_LIBRARY) set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${AMD_LIBRARY}) - endif (AMD_LIBRARY) + endif () find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS ${UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) - if (SUITESPARSE_LIBRARY) + if(SUITESPARSE_LIBRARY) set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${SUITESPARSE_LIBRARY}) - endif (SUITESPARSE_LIBRARY) + endif () + + find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) + if(CHOLMOD_LIBRARY) + set(UMFPACK_LIBRARIES ${UMFPACK_LIBRARIES} ${CHOLMOD_LIBRARY}) + endif() endif(UMFPACK_LIBRARIES) @@ -45,4 +50,4 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(UMFPACK DEFAULT_MSG UMFPACK_INCLUDES UMFPACK_LIBRARIES) -mark_as_advanced(UMFPACK_INCLUDES UMFPACK_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY SUITESPARSE_LIBRARY) +mark_as_advanced(UMFPACK_INCLUDES UMFPACK_LIBRARIES AMD_LIBRARY COLAMD_LIBRARY CHOLMOD_LIBRARY SUITESPARSE_LIBRARY) From ae87f094eb86ec56f110ff1f317aa57f765d33e7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 5 Nov 2015 12:08:36 +0100 Subject: [PATCH 176/344] Fix "," in non SSE4 mode --- Eigen/src/Core/arch/SSE/PacketMath.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3fcb1c138..eb517b871 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -109,9 +109,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1, + HasBlend = 1 #ifdef EIGEN_VECTORIZE_SSE4_1 + , HasRound = 1, HasFloor = 1, HasCeil = 1 @@ -132,9 +133,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1, + HasBlend = 1 #ifdef EIGEN_VECTORIZE_SSE4_1 + , HasRound = 1, HasFloor = 1, HasCeil = 1 From 9ceaa8e445524e9f63720fa5015fa06f6dbdcb0e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 5 Nov 2015 13:54:26 +0100 Subject: [PATCH 177/344] bug #1063: nest AutoDiffScalar by value to avoid dead references --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index bc641aef4..8b58b512b 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -629,7 +629,7 @@ template struct NumTraits > typedef AutoDiffScalar::Real,DerType::RowsAtCompileTime,DerType::ColsAtCompileTime, DerType::Options, DerType::MaxRowsAtCompileTime, DerType::MaxColsAtCompileTime> > Real; typedef AutoDiffScalar NonInteger; - typedef AutoDiffScalar& Nested; + typedef AutoDiffScalar Nested; enum{ RequireInitialization = 1 }; From 589b839ad0aa7248384b346d66724be4cb4c0ab6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 5 Nov 2015 14:54:05 +0100 Subject: [PATCH 178/344] Add unit test for Hessian via AutoDiffScalar --- unsupported/test/autodiff.cpp | 53 +++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp index 087e7c542..1aa1b3d2d 100644 --- a/unsupported/test/autodiff.cpp +++ b/unsupported/test/autodiff.cpp @@ -129,6 +129,7 @@ template void forward_jacobian(const Func& f) // TODO also check actual derivatives! +template void test_autodiff_scalar() { Vector2f p = Vector2f::Random(); @@ -140,6 +141,7 @@ void test_autodiff_scalar() } // TODO also check actual derivatives! +template void test_autodiff_vector() { Vector2f p = Vector2f::Random(); @@ -153,6 +155,7 @@ void test_autodiff_vector() VERIFY_IS_APPROX(res.value(), foo(p)); } +template void test_autodiff_jacobian() { CALL_SUBTEST(( forward_jacobian(TestFunc1()) )); @@ -162,12 +165,56 @@ void test_autodiff_jacobian() CALL_SUBTEST(( forward_jacobian(TestFunc1(3,3)) )); } + +template +void test_autodiff_hessian() +{ + typedef AutoDiffScalar AD; + typedef Matrix VectorAD; + typedef AutoDiffScalar ADD; + typedef Matrix VectorADD; + VectorADD x(2); + double s1 = internal::random(), s2 = internal::random(), s3 = internal::random(), s4 = internal::random(); + x(0).value()=s1; + x(1).value()=s2; + + //set unit vectors for the derivative directions (partial derivatives of the input vector) + x(0).derivatives().resize(2); + x(0).derivatives().setZero(); + x(0).derivatives()(0)= 1; + x(1).derivatives().resize(2); + x(1).derivatives().setZero(); + x(1).derivatives()(1)=1; + + //repeat partial derivatives for the inner AutoDiffScalar + x(0).value().derivatives() = VectorXd::Unit(2,0); + x(1).value().derivatives() = VectorXd::Unit(2,1); + + //set the hessian matrix to zero + for(int idx=0; idx<2; idx++) { + x(0).derivatives()(idx).derivatives() = VectorXd::Zero(2); + x(1).derivatives()(idx).derivatives() = VectorXd::Zero(2); + } + + ADD y = sin(AD(s3)*x(0) + AD(s4)*x(1)); + + VERIFY_IS_APPROX(y.value().derivatives()(0), y.derivatives()(0).value()); + VERIFY_IS_APPROX(y.value().derivatives()(1), y.derivatives()(1).value()); + VERIFY_IS_APPROX(y.value().derivatives()(0), s3*std::cos(s1*s3+s2*s4)); + VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4)); + VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3)); + VERIFY_IS_APPROX(y.derivatives()(1).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4)); +} + + + void test_autodiff() { for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( test_autodiff_scalar() ); - CALL_SUBTEST_2( test_autodiff_vector() ); - CALL_SUBTEST_3( test_autodiff_jacobian() ); + CALL_SUBTEST_1( test_autodiff_scalar<1>() ); + CALL_SUBTEST_2( test_autodiff_vector<1>() ); + CALL_SUBTEST_3( test_autodiff_jacobian<1>() ); + CALL_SUBTEST_4( test_autodiff_hessian<1>() ); } } From ec5a81b45aeae230dcc7ec724935e5f9e3124d3f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 5 Nov 2015 13:39:48 -0800 Subject: [PATCH 179/344] Fixed a bug in the extraction of sizes of fixed sized tensors of rank 0 --- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 145ca0d64..ae02d15a2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -75,8 +75,8 @@ struct fixed_size_tensor_index_extraction_helper static inline Index run(const Index index, const Dimensions& dimensions) { - const Index mult = (index == n) ? 1 : 0; - return array_get(dimensions) * mult + + const Index mult = (index == n-1) ? 1 : 0; + return array_get(dimensions) * mult + fixed_size_tensor_index_extraction_helper::run(index, dimensions); } }; @@ -85,13 +85,12 @@ template struct fixed_size_tensor_index_extraction_helper { template EIGEN_DEVICE_FUNC - static inline Index run(const Index index, - const Dimensions& dimensions) + static inline Index run(const Index, + const Dimensions&) { - const Index mult = (index == 0) ? 1 : 0; - return array_get<0>(dimensions) * mult; + return 0; } -}; + }; } // end namespace internal @@ -129,7 +128,7 @@ struct Sizes : internal::numeric_list { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { - return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); + return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -386,6 +385,10 @@ static const std::ptrdiff_t value = Sizes::count; template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes&) { return get >::value; } +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { + eigen_assert(false && "should never be called"); + return -1; +} #else template struct array_size > { static const size_t value = Sizes::count; From c75a19f81525591631670ded0a551e983677db1f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 5 Nov 2015 14:21:20 -0800 Subject: [PATCH 180/344] Misc fixes to full reductions --- unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 1d534f8ae..4233f7341 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -64,10 +64,10 @@ template struct DimInitializer { } }; -template <> struct DimInitializer > { +template <> struct DimInitializer > { template EIGEN_DEVICE_FUNC static void run(const InputDims& input_dims, const array&, - Sizes<0>*, array* reduced_dims) { + Sizes<>*, array* reduced_dims) { const int NumInputDims = internal::array_size::value; for (int i = 0; i < NumInputDims; ++i) { (*reduced_dims)[i] = input_dims[i]; @@ -138,8 +138,8 @@ struct GenericDimReducer<0, Self, Op> { }; template struct GenericDimReducer<-1, Self, Op> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::CoeffReturnType*) { - eigen_assert(false && "should never be called"); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) { + reducer.reduce(self.m_impl.coeff(index), accum); } }; @@ -563,7 +563,7 @@ struct TensorEvaluator, Device> static const int NumInputDims = internal::array_size::value; static const int NumReducedDims = internal::array_size::value; static const int NumOutputDims = NumInputDims - NumReducedDims; - typedef typename internal::conditional, DSizes >::type Dimensions; + typedef typename internal::conditional, DSizes >::type Dimensions; typedef typename XprType::Scalar Scalar; typedef TensorEvaluator, Device> Self; static const bool InputPacketAccess = TensorEvaluator::PacketAccess; From 0d15ad80195ec5cd33f057068e34aa7e1dc2b783 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 5 Nov 2015 14:22:30 -0800 Subject: [PATCH 181/344] Updated the regressions tests that cover full reductions --- unsupported/test/cxx11_tensor_argmax.cpp | 16 +-- unsupported/test/cxx11_tensor_index_list.cpp | 16 +-- unsupported/test/cxx11_tensor_map.cpp | 7 +- unsupported/test/cxx11_tensor_reduction.cpp | 111 ++++++++++++------ .../test/cxx11_tensor_reduction_cuda.cpp | 8 +- unsupported/test/cxx11_tensor_sugar.cpp | 2 +- 6 files changed, 102 insertions(+), 58 deletions(-) diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp index 6eeecb717..482dfa7de 100644 --- a/unsupported/test/cxx11_tensor_argmax.cpp +++ b/unsupported/test/cxx11_tensor_argmax.cpp @@ -61,14 +61,14 @@ static void test_argmax_tuple_reducer() Tensor, 4, DataLayout> index_tuples(2,3,5,7); index_tuples = tensor.index_tuples(); - Tensor, 1, DataLayout> reduced(1); + Tensor, 0, DataLayout> reduced; DimensionList dims; reduced = index_tuples.reduce( dims, internal::ArgMaxTupleReducer>()); - Tensor maxi = tensor.maximum(); + Tensor maxi = tensor.maximum(); - VERIFY_IS_EQUAL(maxi(0), reduced(0).second); + VERIFY_IS_EQUAL(maxi(), reduced(0).second); array reduce_dims; for (int d = 0; d < 3; ++d) reduce_dims[d] = d; @@ -93,14 +93,14 @@ static void test_argmin_tuple_reducer() Tensor, 4, DataLayout> index_tuples(2,3,5,7); index_tuples = tensor.index_tuples(); - Tensor, 1, DataLayout> reduced(1); + Tensor, 0, DataLayout> reduced; DimensionList dims; reduced = index_tuples.reduce( dims, internal::ArgMinTupleReducer>()); - Tensor mini = tensor.minimum(); + Tensor mini = tensor.minimum(); - VERIFY_IS_EQUAL(mini(0), reduced(0).second); + VERIFY_IS_EQUAL(mini(), reduced(0).second); array reduce_dims; for (int d = 0; d < 3; ++d) reduce_dims[d] = d; @@ -123,7 +123,7 @@ static void test_simple_argmax() tensor = (tensor + tensor.constant(0.5)).log(); tensor(0,0,0,0) = 10.0; - Tensor tensor_argmax(1); + Tensor tensor_argmax; tensor_argmax = tensor.argmax(); @@ -144,7 +144,7 @@ static void test_simple_argmin() tensor = (tensor + tensor.constant(0.5)).log(); tensor(0,0,0,0) = -10.0; - Tensor tensor_argmin(1); + Tensor tensor_argmin; tensor_argmin = tensor.argmin(); diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp index ca9d18254..7100c1628 100644 --- a/unsupported/test/cxx11_tensor_index_list.cpp +++ b/unsupported/test/cxx11_tensor_index_list.cpp @@ -142,7 +142,7 @@ static void test_type2index_list() } const Dims4 reduction_axis4; - Tensor result4 = tensor.sum(reduction_axis4); + Tensor result4 = tensor.sum(reduction_axis4); float expected = 0.0f; for (int m = 0; m < 11; ++m) { for (int l = 0; l < 7; ++l) { @@ -155,7 +155,7 @@ static void test_type2index_list() } } } - VERIFY_IS_APPROX(result4(0), expected); + VERIFY_IS_APPROX(result4(), expected); } @@ -236,9 +236,9 @@ static void test_mixed_index_list() EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #endif - Tensor result1 = tensor.sum(reduction_axis); - Tensor result2 = tensor.sum(reduction_indices); - Tensor result3 = tensor.sum(reduction_list); + Tensor result1 = tensor.sum(reduction_axis); + Tensor result2 = tensor.sum(reduction_indices); + Tensor result3 = tensor.sum(reduction_list); float expected = 0.0f; for (int i = 0; i < 2; ++i) { @@ -250,9 +250,9 @@ static void test_mixed_index_list() } } } - VERIFY_IS_APPROX(result1(0), expected); - VERIFY_IS_APPROX(result2(0), expected); - VERIFY_IS_APPROX(result3(0), expected); + VERIFY_IS_APPROX(result1(), expected); + VERIFY_IS_APPROX(result2(), expected); + VERIFY_IS_APPROX(result3(), expected); } diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index 9e79209bb..a8a095e38 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -232,8 +232,11 @@ static void test_from_tensor() static int f(const TensorMap >& tensor) { - Tensor result = tensor.sum(); - return result(0); + // Size<0> empty; + EIGEN_STATIC_ASSERT((internal::array_size>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_size>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + Tensor result = tensor.sum(); + return result(); } static void test_casting() diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp index e8180c061..0ec316991 100644 --- a/unsupported/test/cxx11_tensor_reduction.cpp +++ b/unsupported/test/cxx11_tensor_reduction.cpp @@ -13,6 +13,45 @@ using Eigen::Tensor; +template +static void test_trivial_reductions() { + { + Tensor tensor; + tensor.setRandom(); + array reduction_axis; + + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result(), tensor()); + } + + { + Tensor tensor(7); + tensor.setRandom(); + array reduction_axis; + + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 7); + for (int i = 0; i < 7; ++i) { + VERIFY_IS_EQUAL(result(i), tensor(i)); + } + } + + { + Tensor tensor(2, 3); + tensor.setRandom(); + array reduction_axis; + + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 3); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(result(i, j), tensor(i, j)); + } + } + } +} + template static void test_simple_reductions() { Tensor tensor(2, 3, 5, 7); @@ -37,18 +76,18 @@ static void test_simple_reductions() { } { - Tensor sum1 = tensor.sum(); - VERIFY_IS_EQUAL(sum1.dimension(0), 1); + Tensor sum1 = tensor.sum(); + VERIFY_IS_EQUAL(sum1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor sum2 = tensor.sum(reduction_axis4); - VERIFY_IS_EQUAL(sum2.dimension(0), 1); + Tensor sum2 = tensor.sum(reduction_axis4); + VERIFY_IS_EQUAL(sum2.rank(), 0); - VERIFY_IS_APPROX(sum1(0), sum2(0)); + VERIFY_IS_APPROX(sum1(), sum2()); } reduction_axis2[0] = 0; @@ -69,18 +108,18 @@ static void test_simple_reductions() { } { - Tensor prod1 = tensor.prod(); - VERIFY_IS_EQUAL(prod1.dimension(0), 1); + Tensor prod1 = tensor.prod(); + VERIFY_IS_EQUAL(prod1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor prod2 = tensor.prod(reduction_axis4); - VERIFY_IS_EQUAL(prod2.dimension(0), 1); + Tensor prod2 = tensor.prod(reduction_axis4); + VERIFY_IS_EQUAL(prod2.rank(), 0); - VERIFY_IS_APPROX(prod1(0), prod2(0)); + VERIFY_IS_APPROX(prod1(), prod2()); } reduction_axis2[0] = 0; @@ -101,18 +140,18 @@ static void test_simple_reductions() { } { - Tensor max1 = tensor.maximum(); - VERIFY_IS_EQUAL(max1.dimension(0), 1); + Tensor max1 = tensor.maximum(); + VERIFY_IS_EQUAL(max1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor max2 = tensor.maximum(reduction_axis4); - VERIFY_IS_EQUAL(max2.dimension(0), 1); + Tensor max2 = tensor.maximum(reduction_axis4); + VERIFY_IS_EQUAL(max2.rank(), 0); - VERIFY_IS_APPROX(max1(0), max2(0)); + VERIFY_IS_APPROX(max1(), max2()); } reduction_axis2[0] = 0; @@ -133,18 +172,18 @@ static void test_simple_reductions() { } { - Tensor min1 = tensor.minimum(); - VERIFY_IS_EQUAL(min1.dimension(0), 1); + Tensor min1 = tensor.minimum(); + VERIFY_IS_EQUAL(min1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor min2 = tensor.minimum(reduction_axis4); - VERIFY_IS_EQUAL(min2.dimension(0), 1); + Tensor min2 = tensor.minimum(reduction_axis4); + VERIFY_IS_EQUAL(min2.rank(), 0); - VERIFY_IS_APPROX(min1(0), min2(0)); + VERIFY_IS_APPROX(min1(), min2()); } reduction_axis2[0] = 0; @@ -167,35 +206,35 @@ static void test_simple_reductions() { } { - Tensor mean1 = tensor.mean(); - VERIFY_IS_EQUAL(mean1.dimension(0), 1); + Tensor mean1 = tensor.mean(); + VERIFY_IS_EQUAL(mean1.rank(), 0); array reduction_axis4; reduction_axis4[0] = 0; reduction_axis4[1] = 1; reduction_axis4[2] = 2; reduction_axis4[3] = 3; - Tensor mean2 = tensor.mean(reduction_axis4); - VERIFY_IS_EQUAL(mean2.dimension(0), 1); + Tensor mean2 = tensor.mean(reduction_axis4); + VERIFY_IS_EQUAL(mean2.rank(), 0); - VERIFY_IS_APPROX(mean1(0), mean2(0)); + VERIFY_IS_APPROX(mean1(), mean2()); } { Tensor ints(10); std::iota(ints.data(), ints.data() + ints.dimension(0), 0); - TensorFixedSize > all; + TensorFixedSize > all; all = ints.all(); - VERIFY(!all(0)); + VERIFY(!all()); all = (ints >= ints.constant(0)).all(); - VERIFY(all(0)); + VERIFY(all()); - TensorFixedSize > any; + TensorFixedSize > any; any = (ints > ints.constant(10)).any(); - VERIFY(!any(0)); + VERIFY(!any()); any = (ints < ints.constant(1)).any(); - VERIFY(any(0)); + VERIFY(any()); } } @@ -207,8 +246,8 @@ static void test_full_reductions() { reduction_axis[0] = 0; reduction_axis[1] = 1; - Tensor result = tensor.sum(reduction_axis); - VERIFY_IS_EQUAL(result.dimension(0), 1); + Tensor result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.rank(), 0); float sum = 0.0f; for (int i = 0; i < 2; ++i) { @@ -219,7 +258,7 @@ static void test_full_reductions() { VERIFY_IS_APPROX(result(0), sum); result = tensor.square().sum(reduction_axis).sqrt(); - VERIFY_IS_EQUAL(result.dimension(0), 1); + VERIFY_IS_EQUAL(result.rank(), 0); sum = 0.0f; for (int i = 0; i < 2; ++i) { @@ -227,7 +266,7 @@ static void test_full_reductions() { sum += tensor(i, j) * tensor(i, j); } } - VERIFY_IS_APPROX(result(0), sqrtf(sum)); + VERIFY_IS_APPROX(result(), sqrtf(sum)); } struct UserReducer { @@ -418,6 +457,8 @@ static void test_reduce_middle_dims() { } void test_cxx11_tensor_reduction() { + CALL_SUBTEST(test_trivial_reductions()); + CALL_SUBTEST(test_trivial_reductions()); CALL_SUBTEST(test_simple_reductions()); CALL_SUBTEST(test_simple_reductions()); CALL_SUBTEST(test_full_reductions()); diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cpp b/unsupported/test/cxx11_tensor_reduction_cuda.cpp index f426ebbc1..9e06eb126 100644 --- a/unsupported/test/cxx11_tensor_reduction_cuda.cpp +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cpp @@ -28,7 +28,7 @@ static void test_full_reductions() { Tensor in(num_rows, num_cols); in.setRandom(); - Tensor full_redux(1); + Tensor full_redux; full_redux = in.sum(); std::size_t in_bytes = in.size() * sizeof(float); @@ -38,16 +38,16 @@ static void test_full_reductions() { gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); TensorMap > in_gpu(gpu_in_ptr, num_rows, num_cols); - TensorMap > out_gpu(gpu_out_ptr, 1); + TensorMap > out_gpu(gpu_out_ptr); out_gpu.device(gpu_device) = in_gpu.sum(); - Tensor full_redux_gpu(1); + Tensor full_redux_gpu; gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); gpu_device.synchronize(); // Check that the CPU and GPU reductions return the same result. - VERIFY_IS_APPROX(full_redux(0), full_redux_gpu(0)); + VERIFY_IS_APPROX(full_redux(), full_redux_gpu()); } void test_cxx11_tensor_reduction_cuda() { diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp index 7848acc8b..98671a986 100644 --- a/unsupported/test/cxx11_tensor_sugar.cpp +++ b/unsupported/test/cxx11_tensor_sugar.cpp @@ -14,7 +14,7 @@ static void test_comparison_sugar() { // make sure we have at least one value == 0 t(0,0,0) = 0; - Tensor b; + Tensor b; #define TEST_TENSOR_EQUAL(e1, e2) \ b = ((e1) == (e2)).all(); \ From 29038b982d4e8028b69211ef09e3fc3c7e6dfb69 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 5 Nov 2015 19:39:48 -0800 Subject: [PATCH 182/344] Added support for modulo operation --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 7 +++++++ .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index b004fdd7d..ceced984b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -172,6 +172,13 @@ class TensorBase return unaryExpr(internal::scalar_quotient1_op(rhs)); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator% (Scalar rhs) const { + EIGEN_STATIC_ASSERT(std::numeric_limits::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); + return unaryExpr(internal::scalar_mod_op(rhs)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > cwiseMax(Scalar threshold) const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index a98c6a2e3..8153e8ce5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -14,6 +14,20 @@ namespace Eigen { namespace internal { +/** \internal + * \brief Template functor to compute the modulo between an array and a scalar. + */ +template +struct scalar_mod_op { + EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } + const Scalar m_divisor; +}; +template +struct functor_traits > +{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; + + /** \internal * \brief Template functor to compute the sigmoid of a scalar * \sa class CwiseUnaryOp, ArrayBase::sigmoid() From bfd6ee64f39340b42a1ee21e4b04ff15433e829d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 6 Nov 2015 15:05:37 +0100 Subject: [PATCH 183/344] bug #1105: fix default preallocation when moving from compressed to uncompressed mode --- Eigen/src/SparseCore/SparseMatrix.h | 2 +- test/sparse_basic.cpp | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 272f1d7b4..91bada40f 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -1221,7 +1221,7 @@ typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Op { // make sure the matrix is compatible to random un-compressed insertion: m_data.resize(m_data.allocatedSize()); - this->reserveInnerVectors(Array::Constant(2*m_outerSize, convert_index(m_outerSize))); + this->reserveInnerVectors(Array::Constant(m_outerSize, 2)); } return insertUncompressed(row,col); diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 2d0f5819f..d803e7dae 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -513,4 +513,19 @@ void test_sparse_basic() // Regression test for bug 900: (manually insert higher values here, if you have enough RAM): CALL_SUBTEST_3((big_sparse_triplet >(10000, 10000, 0.125))); CALL_SUBTEST_4((big_sparse_triplet >(10000, 10000, 0.125))); + + // Regression test for bug 1105 +#ifdef EIGEN_TEST_PART_6 + { + int n = Eigen::internal::random(200,600); + SparseMatrix,0, long> mat(n, n); + std::complex val; + + for(int i=0; i Date: Fri, 6 Nov 2015 09:18:43 -0800 Subject: [PATCH 184/344] Reimplement the tensor comparison operators by using the scalar_cmp_op functors. This makes them more cuda friendly. --- Eigen/src/Core/functors/BinaryFunctors.h | 8 ++++++++ Eigen/src/Core/util/Constants.h | 4 +++- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 8 ++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index cc0e80a33..f77066910 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -186,6 +186,14 @@ template struct scalar_cmp_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} }; +template struct scalar_cmp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} +}; +template struct scalar_cmp_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} +}; template struct scalar_cmp_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index c35077af6..28852c8c3 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -531,7 +531,9 @@ enum ComparisonName { cmp_LT = 1, cmp_LE = 2, cmp_UNORD = 3, - cmp_NEQ = 4 + cmp_NEQ = 4, + cmp_GT = 5, + cmp_GE = 6 }; } // end namespace internal diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index ceced984b..906687436 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -257,12 +257,12 @@ class TensorBase template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::less()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::less_equal()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const OtherDerived> @@ -278,12 +278,12 @@ class TensorBase template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::equal_to()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::not_equal_to()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } // comparisons and tests for Scalars From d27e4f1cbaac8e53218a138ab0c58c1adcf07bae Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 6 Nov 2015 09:23:58 -0800 Subject: [PATCH 185/344] Added missing EIGEN_DEVICE_FUNC statements --- Eigen/src/Core/functors/UnaryFunctors.h | 50 ++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 2aab9d1ba..62826654f 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -23,7 +23,7 @@ template struct scalar_opposite_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pnegate(a); } }; template @@ -43,7 +43,7 @@ template struct scalar_abs_op { typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pabs(a); } }; template @@ -94,7 +94,7 @@ template struct scalar_abs2_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pmul(a,a); } }; template @@ -111,7 +111,7 @@ template struct scalar_conjugate_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } }; template struct functor_traits > @@ -132,7 +132,7 @@ template struct scalar_arg_op { typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::parg(a); } }; template @@ -232,7 +232,7 @@ template struct scalar_exp_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } }; template struct functor_traits > @@ -248,7 +248,7 @@ template struct scalar_log_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } template - inline Packet packetOp(const Packet& a) const { return internal::plog(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); } }; template struct functor_traits > @@ -264,7 +264,7 @@ template struct scalar_log10_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); } template - inline Packet packetOp(const Packet& a) const { return internal::plog10(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); } }; template struct functor_traits > @@ -278,7 +278,7 @@ template struct scalar_sqrt_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } template - inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } }; template struct functor_traits > @@ -296,7 +296,7 @@ template struct scalar_rsqrt_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return Scalar(1)/sqrt(a); } template - inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); } }; template @@ -315,7 +315,7 @@ template struct scalar_cos_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } }; template struct functor_traits > @@ -334,7 +334,7 @@ template struct scalar_sin_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } template - inline Packet packetOp(const Packet& a) const { return internal::psin(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); } }; template struct functor_traits > @@ -354,7 +354,7 @@ template struct scalar_tan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } template - inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } }; template struct functor_traits > @@ -373,7 +373,7 @@ template struct scalar_acos_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } }; template struct functor_traits > @@ -392,7 +392,7 @@ template struct scalar_asin_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } }; template struct functor_traits > @@ -411,7 +411,7 @@ template struct scalar_atan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op) inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); } template - inline Packet packetOp(const Packet& a) const { return internal::patan(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); } }; template struct functor_traits > @@ -430,7 +430,7 @@ template struct scalar_tanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } template - inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } }; template struct functor_traits > @@ -449,7 +449,7 @@ template struct scalar_sinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op) inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); } template - inline Packet packetOp(const Packet& a) const { return internal::psinh(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); } }; template struct functor_traits > @@ -468,7 +468,7 @@ template struct scalar_cosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op) inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); } }; template struct functor_traits > @@ -488,7 +488,7 @@ struct scalar_inverse_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pdiv(pset1(Scalar(1)),a); } }; template @@ -504,7 +504,7 @@ struct scalar_square_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pmul(a,a); } }; template @@ -520,7 +520,7 @@ struct scalar_cube_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op) EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pmul(a,pmul(a,a)); } }; template @@ -535,7 +535,7 @@ template struct scalar_round_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pround(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); } }; template struct functor_traits > @@ -554,7 +554,7 @@ template struct scalar_floor_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); } template - inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); } }; template struct functor_traits > @@ -573,7 +573,7 @@ template struct scalar_ceil_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); } typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pceil(a); } + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); } }; template struct functor_traits > From 33cbdc2d1556336962d5e96aeb5324da4f2f2ff7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 6 Nov 2015 09:29:59 -0800 Subject: [PATCH 186/344] Added more missing EIGEN_DEVICE_FUNC --- unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 3 +++ unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h | 1 + unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 1 + 3 files changed, 5 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index e9d3437b7..3ca7daf32 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -52,6 +52,7 @@ struct nested, 1, typename eval struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {} @@ -67,6 +68,7 @@ struct PacketConverter { template struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {} @@ -87,6 +89,7 @@ struct PacketConverter { template struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index 9773afccf..17d89d5e1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -23,6 +23,7 @@ namespace Eigen { */ template struct DimensionList { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const Index operator[] (const Index i) const { return i; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 8153e8ce5..7ba0c2817 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -96,6 +96,7 @@ template struct MeanReducer static const bool PacketAccess = true; static const bool IsStateful = true; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MeanReducer() : scalarCount_(0), packetCount_(0) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { From 6857a35a11757d59ced3f10b3543a8e869333ed1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 6 Nov 2015 09:42:05 -0800 Subject: [PATCH 187/344] Fixed typos --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 906687436..5bf21ab61 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -255,65 +255,65 @@ class TensorBase // Comparisons and tests. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::greater()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), std::greater_equal()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { return binaryExpr(other.derived(), internal::scalar_cmp_op()); } // comparisons and tests for Scalars EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<(Scalar threshold) const { return operator<(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<=(Scalar threshold) const { return operator<=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>(Scalar threshold) const { return operator>(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>=(Scalar threshold) const { return operator>=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator==(Scalar threshold) const { return operator==(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator!=(Scalar threshold) const { return operator!=(constant(threshold)); } From 53432a17b202b7119e9abe357361cc5000ba053c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 6 Nov 2015 10:26:19 -0800 Subject: [PATCH 188/344] Added static assertions to avoid misuses of padding, broadcasting and concatenation ops. --- unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 4 ++++ unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h | 4 +++- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 24a0df820..c7af02b11 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -99,6 +99,10 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { + // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar + // and store the result in a scalar. Instead one should reshape the scalar into a a N-D + // tensor with N >= 1 of 1 element first and then broadcast. + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); const Broadcast& broadcast = op.broadcast(); for (int i = 0; i < NumDims; ++i) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index fa05cab30..3d153bb94 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -131,7 +131,9 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(0 <= m_axis && m_axis < NumDims); const Dimensions& lhs_dims = m_leftImpl.dimensions(); const Dimensions& rhs_dims = m_rightImpl.dimensions(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 07a6e8d4c..91e32d200 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -98,6 +98,11 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_padding(op.padding()) { + // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead + // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector + // of 1 element first and then pad. + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Compute dimensions m_dimensions = m_impl.dimensions(); for (int i = 0; i < NumDims; ++i) { From 9fa283339f64224643089740d89c9f35aa3ab2b3 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 6 Nov 2015 11:44:22 -0800 Subject: [PATCH 189/344] Silenced a compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index d0202559a..04da9a458 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -104,7 +104,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { m_impl.evalSubExprsIfNeeded(NULL); const Index numValues = m_impl.dimensions().TotalSize(); m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); From d573efe303b51317d57754f52a22debfa095dc9f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 6 Nov 2015 14:54:28 -0800 Subject: [PATCH 190/344] Code cleanup --- unsupported/Eigen/CXX11/Tensor | 1 + .../Eigen/CXX11/src/Tensor/TensorReduction.h | 181 ------------------ .../CXX11/src/Tensor/TensorReductionCuda.h | 140 ++++++++++++++ 3 files changed, 141 insertions(+), 181 deletions(-) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 282ea00bb..1e3d2c06a 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -74,6 +74,7 @@ #include "src/Tensor/TensorEvaluator.h" #include "src/Tensor/TensorExpr.h" #include "src/Tensor/TensorReduction.h" +#include "src/Tensor/TensorReductionCuda.h" #include "src/Tensor/TensorArgMax.h" #include "src/Tensor/TensorConcatenation.h" #include "src/Tensor/TensorContraction.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 4233f7341..d4e88fabd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -336,187 +336,6 @@ struct FullReducer { }; #endif - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -// Full reducers for GPU, don't vectorize for now - -// Reducer function that enables multiple cuda thread to safely accumulate at the same -// output address. It basically reads the current value of the output variable, and -// attempts to update it with the new value. If in the meantime another cuda thread -// updated the content of the output address it will try again. -template -__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { -#if __CUDA_ARCH__ >= 300 - if (sizeof(T) == 4) - { - unsigned int oldval = *reinterpret_cast(output); - unsigned int newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - unsigned int readback; - while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - } - } - else if (sizeof(T) == 8) { - unsigned long long oldval = *reinterpret_cast(output); - unsigned long long newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - unsigned long long readback; - while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast(&newval)); - if (newval == oldval) { - return; - } - } - } - else { - assert(0 && "Wordsize not supported"); - } -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - -template -__device__ inline void atomicReduce(T* output, T accum, SumReducer&) { -#if __CUDA_ARCH__ >= 300 - atomicAdd(output, accum); -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - -template -__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, - typename Self::CoeffReturnType* output) { - const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; - - if (first_index == 0) { - *output = reducer.initialize(); - } - - typename Self::CoeffReturnType accum = reducer.initialize(); - for (Index i = 0; i < NumPerThread; ++i) { - const Index index = first_index + i * BlockSize; - if (index >= num_coeffs) { - break; - } - typename Self::CoeffReturnType val = input.m_impl.coeff(index); - reducer.reduce(val, &accum); - } - - for (int offset = warpSize/2; offset > 0; offset /= 2) { - reducer.reduce(__shfl_down(accum, offset), &accum); - } - - if ((threadIdx.x & (warpSize - 1)) == 0) { - atomicReduce(output, accum, reducer); - } -} - - -template -struct FullReducer { - // Unfortunately nvidia doesn't support well exotic types such as complex, - // so reduce the scope of the optimized version of the code to the simple case - // of floats. - static const bool HasOptimizedImplementation = !Op::IsStateful && - internal::is_same::value; - - template - static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { - assert(false && "Should only be called on floats"); - } - - static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { - typedef typename Self::Index Index; - - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - const int block_size = 256; - const int num_per_thread = 128; - const int num_blocks = std::ceil(static_cast(num_coeffs) / (block_size * num_per_thread)); - LAUNCH_CUDA_KERNEL((FullReductionKernel), - num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); - } -}; - -#endif - - -template -class BlockReducer { - public: - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::CoeffReturnType CoeffReturnType; - explicit BlockReducer(const Op& reducer) : op_(reducer) { - accum_ = op_.initialize(); - } - void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { - for (Index i = 0; i < num_values_to_reduce; ++i) { - op_.reduce(data[index + i], &accum_); - } - } - CoeffReturnType Finalize() { - return op_.finalize(accum_); - } - - private: - CoeffReturnType accum_; - Op op_; -}; - - -template -class BlockReducer { - public: - typedef typename Self::Index Index; - typedef typename Self::Scalar Scalar; - typedef typename Self::CoeffReturnType CoeffReturnType; - typedef typename Self::PacketReturnType PacketReturnType; - explicit BlockReducer(const Op& reducer) : op_(reducer) { - vaccum_ = op_.template initializePacket(); - accum_ = op_.initialize(); - } - void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { - const int packet_size = internal::unpacket_traits::size; - const typename Self::Index vectorized_size = (num_values_to_reduce / - packet_size) * packet_size; - for (typename Self::Index i = 0; i < vectorized_size; i += packet_size) { - op_.reducePacket(internal::ploadt( - &data[index + i]), &vaccum_); - } - - for (typename Self::Index i = vectorized_size; - i < num_values_to_reduce; ++i) { - op_.reduce(data[index + i], &accum_); - } - } - typename Self::CoeffReturnType Finalize() { - return op_.finalizeBoth(accum_, vaccum_); - } - - private: - typename Self::PacketReturnType vaccum_; - typename Self::CoeffReturnType accum_; - Op op_; -}; - } // end namespace internal diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h new file mode 100644 index 000000000..49102fca2 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -0,0 +1,140 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H + +namespace Eigen { +namespace internal { + + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +// Full reducers for GPU, don't vectorize for now + +// Reducer function that enables multiple cuda thread to safely accumulate at the same +// output address. It basically reads the current value of the output variable, and +// attempts to update it with the new value. If in the meantime another cuda thread +// updated the content of the output address it will try again. +template +__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if __CUDA_ARCH__ >= 300 + if (sizeof(T) == 4) + { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else if (sizeof(T) == 8) { + unsigned long long oldval = *reinterpret_cast(output); + unsigned long long newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned long long readback; + while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else { + assert(0 && "Wordsize not supported"); + } +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +template +__device__ inline void atomicReduce(T* output, T accum, SumReducer&) { +#if __CUDA_ARCH__ >= 300 + atomicAdd(output, accum); +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +template +__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, + typename Self::CoeffReturnType* output) { + const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; + + if (first_index == 0) { + *output = reducer.initialize(); + } + + typename Self::CoeffReturnType accum = reducer.initialize(); + for (Index i = 0; i < NumPerThread; ++i) { + const Index index = first_index + i * BlockSize; + if (index >= num_coeffs) { + break; + } + typename Self::CoeffReturnType val = input.m_impl.coeff(index); + reducer.reduce(val, &accum); + } + + for (int offset = warpSize/2; offset > 0; offset /= 2) { + reducer.reduce(__shfl_down(accum, offset), &accum); + } + + if ((threadIdx.x & (warpSize - 1)) == 0) { + atomicReduce(output, accum, reducer); + } +} + + +template +struct FullReducer { + // Unfortunately nvidia doesn't support well exotic types such as complex, + // so reduce the scope of the optimized version of the code to the simple case + // of floats. + static const bool HasOptimizedImplementation = !Op::IsStateful && + internal::is_same::value; + + template + EIGEN_DEVICE_FUNC static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { + assert(false && "Should only be called on floats"); + } + + EIGEN_DEVICE_FUNC static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { + typedef typename Self::Index Index; + + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + const int block_size = 256; + const int num_per_thread = 128; + const int num_blocks = std::ceil(static_cast(num_coeffs) / (block_size * num_per_thread)); + LAUNCH_CUDA_KERNEL((FullReductionKernel), + num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); + } +}; + +#endif + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H From 20e2ab1121b0727dadcf886df12ab9e79d0c8c17 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 7 Dec 2015 16:17:57 -0800 Subject: [PATCH 191/344] Fixed another compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index f49e2b260..eda93a1de 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -647,7 +647,7 @@ struct TensorContractionEvaluatorBase eigen_assert(size == eval_right_dims[right] && "Contraction axes must be same size"); - if (i+1 < internal::array_size::value) { + if (i+1 < static_cast(internal::array_size::value)) { m_k_strides[i+1] = m_k_strides[i] * size; } else { m_k_size = m_k_strides[i] * size; From 03ad4fc50461f7d06f1cd007cdf5c4c49e843fdf Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Dec 2015 11:27:43 +0100 Subject: [PATCH 192/344] Extend unit test of coeff-based product to check many more combinations --- test/product_small.cpp | 163 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/test/product_small.cpp b/test/product_small.cpp index 2a2c1e35f..c35db6f65 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -29,6 +29,153 @@ void product1x1() matAdynamic.cwiseProduct(matBdynamic.transpose()).sum() ); } +template +const TC& ref_prod(TC &C, const TA &A, const TB &B) +{ + for(Index i=0;i +typename internal::enable_if::type +test_lazy_single(int rows, int cols, int depth) +{ + Matrix A(rows,depth); A.setRandom(); + Matrix B(depth,cols); B.setRandom(); + Matrix C(rows,cols); C.setRandom(); + Matrix D(C); + VERIFY_IS_APPROX(C+=A.lazyProduct(B), ref_prod(D,A,B)); +} + +template +typename internal::enable_if< ( (Rows ==1&&Depth!=1&&OA==ColMajor) + || (Depth==1&&Rows !=1&&OA==RowMajor) + || (Cols ==1&&Depth!=1&&OB==RowMajor) + || (Depth==1&&Cols !=1&&OB==ColMajor) + || (Rows ==1&&Cols !=1&&OC==ColMajor) + || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>::type +test_lazy_single(int, int, int) +{ +} + +template +void test_lazy_all_layout(int rows=Rows, int cols=Cols, int depth=Depth) +{ + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); +} + +template +void test_lazy_l1() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // Inner + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,1,depth) )); + + // Outer + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(7,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,cols) )); +} + +template +void test_lazy_l2() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // mat-vec + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,1,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,1,depth) )); + + // vec-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(1,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols,depth) )); +} + +template +void test_lazy_l3() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + // mat-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,3,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,6,depth) )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(8,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(3,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols,depth) )); +} void test_product_small() { @@ -39,6 +186,22 @@ void test_product_small() CALL_SUBTEST_4( product(Matrix4d()) ); CALL_SUBTEST_5( product(Matrix4f()) ); CALL_SUBTEST_6( product1x1() ); + + CALL_SUBTEST_11( test_lazy_l1() ); + CALL_SUBTEST_12( test_lazy_l2() ); + CALL_SUBTEST_13( test_lazy_l3() ); + + CALL_SUBTEST_21( test_lazy_l1() ); + CALL_SUBTEST_22( test_lazy_l2() ); + CALL_SUBTEST_23( test_lazy_l3() ); + + CALL_SUBTEST_31( test_lazy_l1 >() ); + CALL_SUBTEST_32( test_lazy_l2 >() ); + CALL_SUBTEST_33( test_lazy_l3 >() ); + + CALL_SUBTEST_41( test_lazy_l1 >() ); + CALL_SUBTEST_42( test_lazy_l2 >() ); + CALL_SUBTEST_43( test_lazy_l3 >() ); } #ifdef EIGEN_TEST_PART_6 From 543bd28a24314c211c7eb0843fb445309104778e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Dec 2015 11:28:05 +0100 Subject: [PATCH 193/344] Fix Alignment in coeff-based product, and enable unaligned vectorization --- Eigen/src/Core/ProductEvaluators.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index bd1e1c85d..a55af911e 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -447,7 +447,7 @@ struct product_evaluator, ProductTag, DenseShape, MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, PacketSize = packet_traits::size, - + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost @@ -463,19 +463,16 @@ struct product_evaluator, ProductTag, DenseShape, LhsAlignment = LhsEtorType::Alignment, RhsAlignment = RhsEtorType::Alignment, - LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits::alignment), - RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits::alignment), - LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, SameType = is_same::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ), + && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ), + && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -487,10 +484,13 @@ struct product_evaluator, ProductTag, DenseShape, | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), - Alignment = CanVectorizeLhs ? LhsAlignment - : CanVectorizeRhs ? RhsAlignment + LhsOuterStrideBytes = LhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename LhsNestedCleaned::Scalar), + RhsOuterStrideBytes = RhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename RhsNestedCleaned::Scalar), + + Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) : 0, - + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect @@ -500,7 +500,6 @@ struct product_evaluator, ProductTag, DenseShape, && LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsIsAligned && RhsIsAligned) && (InnerSize % packet_traits::size == 0) }; @@ -524,10 +523,9 @@ struct product_evaluator, ProductTag, DenseShape, const PacketType packet(Index row, Index col) const { PacketType res; - typedef etor_product_packet_impl PacketImpl; - PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } From 45495499925a5396fcc0b0f6d7188d97e84a9491 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Dec 2015 16:21:49 +0100 Subject: [PATCH 194/344] Fix and clarify documentation of Transform wrt operator*(MatrixBase) --- Eigen/src/Geometry/Transform.h | 49 +++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 8c9d7049b..75f20bda6 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -118,15 +118,15 @@ template struct transform_make_affine; * * However, unlike a plain matrix, the Transform class provides many features * simplifying both its assembly and usage. In particular, it can be composed - * with any other transformations (Transform,Translation,RotationBase,Matrix) + * with any other transformations (Transform,Translation,RotationBase,DiagonalMatrix) * and can be directly used to transform implicit homogeneous vectors. All these * operations are handled via the operator*. For the composition of transformations, * its principle consists to first convert the right/left hand sides of the product * to a compatible (Dim+1)^2 matrix and then perform a pure matrix product. * Of course, internally, operator* tries to perform the minimal number of operations * according to the nature of each terms. Likewise, when applying the transform - * to non homogeneous vectors, the latters are automatically promoted to homogeneous - * one before doing the matrix product. The convertions to homogeneous representations + * to points, the latters are automatically promoted to homogeneous vectors + * before doing the matrix product. The conventions to homogeneous representations * are performed as follow: * * \b Translation t (Dim)x(1): @@ -140,7 +140,7 @@ template struct transform_make_affine; * R & 0\\ * 0\,...\,0 & 1 * \end{array} \right) \f$ - * + * + * \b Scaling \b DiagonalMatrix S (Dim)x(Dim): + * \f$ \left( \begin{array}{cc} + * S & 0\\ + * 0\,...\,0 & 1 + * \end{array} \right) \f$ * - * \b Column \b vector v (Dim)x(1): + * \b Column \b point v (Dim)x(1): * \f$ \left( \begin{array}{c} * v\\ * 1 * \end{array} \right) \f$ * - * \b Set \b of \b column \b vectors V1...Vn (Dim)x(n): + * \b Set \b of \b column \b points V1...Vn (Dim)x(n): * \f$ \left( \begin{array}{ccc} * v_1 & ... & v_n\\ * 1 & ... & 1 @@ -404,26 +410,39 @@ public: /** \returns a writable expression of the translation vector of the transformation */ inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); } - /** \returns an expression of the product between the transform \c *this and a matrix expression \a other + /** \returns an expression of the product between the transform \c *this and a matrix expression \a other. * - * The right hand side \a other might be either: - * \li a vector of size Dim, + * The right-hand-side \a other can be either: * \li an homogeneous vector of size Dim+1, - * \li a set of vectors of size Dim x Dynamic, - * \li a set of homogeneous vectors of size Dim+1 x Dynamic, - * \li a linear transformation matrix of size Dim x Dim, - * \li an affine transformation matrix of size Dim x Dim+1, + * \li a set of homogeneous vectors of size Dim+1 x N, * \li a transformation matrix of size Dim+1 x Dim+1. + * + * Moreover, if \c *this represents an affine transformation (i.e., Mode!=Projective), then \a other can also be: + * \li a point of size Dim (computes: \code this->linear() * other + this->translation()\endcode), + * \li a set of N points as a Dim x N matrix (computes: \code (this->linear() * other).colwise() + this->translation()\endcode), + * + * In all cases, the return type is a matrix or vector of same sizes as the right-hand-side \a other. + * + * If you want to interpret \a other as a linear or affine transformation, then first convert it to a Transform<> type, + * or do your own cooking. + * + * Finally, if you want to apply Affine transformations to vectors, then explicitly apply the linear part only: + * \code + * Affine3f A; + * Vector3f v1, v2; + * v2 = A.linear() * v1; + * \endcode + * */ // note: this function is defined here because some compilers cannot find the respective declaration template - EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl::ResultType + EIGEN_STRONG_INLINE const typename OtherDerived::PlainObject operator * (const EigenBase &other) const { return internal::transform_right_product_impl::run(*this,other.derived()); } /** \returns the product expression of a transformation matrix \a a times a transform \a b * - * The left hand side \a other might be either: + * The left hand side \a other can be either: * \li a linear transformation matrix of size Dim x Dim, * \li an affine transformation matrix of size Dim x Dim+1, * \li a general transformation matrix of size Dim+1 x Dim+1. From 1257fbd2f9a188340c29e88232c4da99b62dcc72 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Dec 2015 10:06:42 +0100 Subject: [PATCH 195/344] Fix sign-unsigned issue in enum --- Eigen/src/Core/ProductEvaluators.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index a55af911e..794038a2a 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -484,8 +484,8 @@ struct product_evaluator, ProductTag, DenseShape, | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), - LhsOuterStrideBytes = LhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename LhsNestedCleaned::Scalar), - RhsOuterStrideBytes = RhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename RhsNestedCleaned::Scalar), + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) From dc73430d4b49023ae4a19f9e8b35c64742f528fc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Dec 2015 15:47:08 +0100 Subject: [PATCH 196/344] bug #1074: forbid the creation of PlainObjectBase object by making its ctor protected --- Eigen/src/Core/PlainObjectBase.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 6f1350dc0..1225e85b4 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -449,6 +449,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type return Base::operator=(func); } + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() { @@ -495,17 +499,6 @@ class PlainObjectBase : public internal::dense_xpr_base::type // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } - /** \copydoc MatrixBase::operator=(const EigenBase&) - */ - template - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) - { - _resize_to_match(other); - Base::operator=(other.derived()); - return this->derived(); - } - /** \sa PlainObjectBase::operator=(const EigenBase&) */ template EIGEN_DEVICE_FUNC @@ -519,7 +512,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** \sa PlainObjectBase::operator=(const EigenBase&) */ template - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) : m_storage() { @@ -538,6 +531,19 @@ class PlainObjectBase : public internal::dense_xpr_base::type other.evalTo(this->derived()); } + public: + + /** \copydoc MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + { + _resize_to_match(other); + Base::operator=(other.derived()); + return this->derived(); + } + /** \name Map * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects, * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned From fbe18d550780195dc18feb896c35a98b111bd190 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Dec 2015 15:47:32 +0100 Subject: [PATCH 197/344] Forbid the creation of SparseCompressedBase object --- Eigen/src/SparseCore/SparseCompressedBase.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index fb795a0ed..c223e4f42 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -95,7 +95,12 @@ class SparseCompressedBase /** \returns whether \c *this is in compressed form. */ inline bool isCompressed() const { return innerNonZeroPtr()==0; } - + + protected: + /** Default constructor. Do nothing. */ + SparseCompressedBase() {} + private: + template explicit SparseCompressedBase(const SparseCompressedBase&); }; template From 21ed29e2c9a3d32a2e9ea2ba91eb8f19ee620b0e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 9 Dec 2015 20:46:09 +0100 Subject: [PATCH 198/344] Disable complex scalar types because the compiler might aggressively vectorize the initialization of complex coeffs to 0 before we can check for alignedness --- test/unalignedassert.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index 014cc834b..e2f03ffca 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -157,7 +157,9 @@ void unalignedassert() VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + // Complexes are disabled because the compiler might aggressively vectorize + // the initialization of complex coeffs to 0 before we can check for alignedness + //VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); } for(int b=8; b(b)); if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); if(b<128) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); - if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + //if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); } #endif } From f248249c1f28ce1ab70fea7742998c5b47b67480 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 11:57:57 +0100 Subject: [PATCH 199/344] bug #1113: fix name conflict with C99's "I". --- Eigen/src/Core/SolveTriangular.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index f8bb4b314..0300220ca 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -115,17 +115,17 @@ template struct triangular_solver_unroller { enum { IsLower = ((Mode&Lower)==Lower), - I = IsLower ? Index : Size - Index - 1, - S = IsLower ? 0 : I+1 + RowIndex = IsLower ? Index : Size - Index - 1, + S = IsLower ? 0 : RowIndex+1 }; static void run(const Lhs& lhs, Rhs& rhs) { if (Index>0) - rhs.coeffRef(I) -= lhs.row(I).template segment(S).transpose() + rhs.coeffRef(RowIndex) -= lhs.row(RowIndex).template segment(S).transpose() .cwiseProduct(rhs.template segment(S)).sum(); if(!(Mode & UnitDiag)) - rhs.coeffRef(I) /= lhs.coeff(I,I); + rhs.coeffRef(RowIndex) /= lhs.coeff(RowIndex,RowIndex); triangular_solver_unroller::run(lhs,rhs); } From 75f0fe3795e6e04c338f01ab383b0a3d9a6be334 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 12:01:06 +0100 Subject: [PATCH 200/344] Fix usage of "Index" as a compile time integral. --- Eigen/src/Core/SolveTriangular.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 0300220ca..4914f6ba8 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -107,32 +107,32 @@ struct triangular_solver_selector * meta-unrolling implementation ***************************************************************************/ -template +template struct triangular_solver_unroller; -template -struct triangular_solver_unroller { +template +struct triangular_solver_unroller { enum { IsLower = ((Mode&Lower)==Lower), - RowIndex = IsLower ? Index : Size - Index - 1, + RowIndex = IsLower ? LoopIndex : Size - LoopIndex - 1, S = IsLower ? 0 : RowIndex+1 }; static void run(const Lhs& lhs, Rhs& rhs) { - if (Index>0) - rhs.coeffRef(RowIndex) -= lhs.row(RowIndex).template segment(S).transpose() - .cwiseProduct(rhs.template segment(S)).sum(); + if (LoopIndex>0) + rhs.coeffRef(RowIndex) -= lhs.row(RowIndex).template segment(S).transpose() + .cwiseProduct(rhs.template segment(S)).sum(); if(!(Mode & UnitDiag)) rhs.coeffRef(RowIndex) /= lhs.coeff(RowIndex,RowIndex); - triangular_solver_unroller::run(lhs,rhs); + triangular_solver_unroller::run(lhs,rhs); } }; -template -struct triangular_solver_unroller { +template +struct triangular_solver_unroller { static void run(const Lhs&, Rhs&) {} }; From 145ad5d8007f8a81dbe0cbfa9f97b11a32e9fba2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 12:03:38 +0100 Subject: [PATCH 201/344] Use more explicit names. --- Eigen/src/Core/SolveTriangular.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index 4914f6ba8..5a2010449 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -115,17 +115,17 @@ template struct triangular_solver_unroller { enum { IsLower = ((Mode&Lower)==Lower), - RowIndex = IsLower ? LoopIndex : Size - LoopIndex - 1, - S = IsLower ? 0 : RowIndex+1 + DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1, + StartIndex = IsLower ? 0 : DiagIndex+1 }; static void run(const Lhs& lhs, Rhs& rhs) { if (LoopIndex>0) - rhs.coeffRef(RowIndex) -= lhs.row(RowIndex).template segment(S).transpose() - .cwiseProduct(rhs.template segment(S)).sum(); + rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment(StartIndex).transpose() + .cwiseProduct(rhs.template segment(StartIndex)).sum(); if(!(Mode & UnitDiag)) - rhs.coeffRef(RowIndex) /= lhs.coeff(RowIndex,RowIndex); + rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex); triangular_solver_unroller::run(lhs,rhs); } From e73ef4f25e3f82448cce924b0051b8629e8081dc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 14:21:23 +0100 Subject: [PATCH 202/344] bug #1109: use noexcept instead of throw for C++11 compilers --- Eigen/src/Core/CommaInitializer.h | 2 +- Eigen/src/Core/util/Macros.h | 25 +++++++++++++++++++++++-- Eigen/src/Core/util/Memory.h | 16 ++++++++-------- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index dc772277d..89bcd750c 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -106,7 +106,7 @@ struct CommaInitializer EIGEN_DEVICE_FUNC inline ~CommaInitializer() #if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS - throw(Eigen::eigen_assert_exception) + EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) #endif { eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 8def69610..fcad3694e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -365,7 +365,7 @@ // Does the compiler support C++11 math? // Let's be conservative and enable the default C++11 implementation only if we are sure it exists #ifndef EIGEN_HAS_CXX11_MATH - #if (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + #if (__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC) #define EIGEN_HAS_CXX11_MATH 1 #else @@ -375,13 +375,26 @@ // Does the compiler support proper C++11 containers? #ifndef EIGEN_HAS_CXX11_CONTAINERS - #if ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG)) || EIGEN_COMP_MSVC >= 1900 + #if (__cplusplus > 201103L) \ + || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ + || EIGEN_COMP_MSVC >= 1900 #define EIGEN_HAS_CXX11_CONTAINERS 1 #else #define EIGEN_HAS_CXX11_CONTAINERS 0 #endif #endif +// Does the compiler support C++11 noexcept? +#ifndef EIGEN_HAS_CXX11_NOEXCEPT + #if (__cplusplus > 201103L) \ + || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ + || EIGEN_COMP_MSVC >= 1900 + #define EIGEN_HAS_CXX11_NOEXCEPT 1 + #else + #define EIGEN_HAS_CXX11_NOEXCEPT 0 + #endif +#endif + /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: @@ -842,4 +855,12 @@ namespace Eigen { # define EIGEN_CATCH(X) else #endif +#if EIGEN_HAS_CXX11_NOEXCEPT +# define EIGEN_NO_THROW noexcept(true) +# define EIGEN_EXCEPTION_SPEC(X) noexcept(false) +#else +# define EIGEN_NO_THROW throw() +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +#endif + #endif // EIGEN_MACROS_H diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 69a489d43..1fc535a3a 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -732,7 +732,7 @@ template void swap(scoped_array &a,scoped_array &b) #if EIGEN_MAX_ALIGN_BYTES!=0 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void* operator new(size_t size, const std::nothrow_t&) throw() { \ + void* operator new(size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc(size); } \ EIGEN_CATCH (...) { return 0; } \ } @@ -743,20 +743,20 @@ template void swap(scoped_array &a,scoped_array &b) void *operator new[](size_t size) { \ return Eigen::internal::conditional_aligned_malloc(size); \ } \ - void operator delete(void * ptr) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete[](void * ptr) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete(void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete[](void * ptr, std::size_t /* sz */) throw() { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \ static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \ - void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \ - void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \ + void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ + void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ /* nothrow-new (returns zero instead of std::bad_alloc) */ \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void operator delete(void *ptr, const std::nothrow_t&) throw() { \ + void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ Eigen::internal::conditional_aligned_free(ptr); \ } \ typedef void eigen_aligned_operator_new_marker_type; From b836acb7992dcec05e5ff59f08b0c73656380c3d Mon Sep 17 00:00:00 2001 From: Taylor Braun-Jones Date: Sat, 7 Nov 2015 21:29:24 -0500 Subject: [PATCH 203/344] Further fixes for CMAKE_INSTALL_PREFIX correctness And other related cmake cleanup, including: - Use CMAKE_CURRENT_LIST_DIR to find UseEigen3.cmake - Use INSTALL_DIR term consistently for variable names - Drop unnecessary extra EIGEN_INCLUDE_INSTALL_DIR - Fix some paths in generated eigen3.pc and Eigen3Config.cmake files missing CMAKE_INSTALL_PREFIX - Fix pkgconfig directory choice ignored if it doesn't exist at configure time (bug #711) --- CMakeLists.txt | 78 +++++++++++++++---------------------- cmake/Eigen3Config.cmake.in | 2 +- eigen3.pc.in | 7 +++- 3 files changed, 37 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aebc6d45d..5240f3039 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ project(Eigen) -cmake_minimum_required(VERSION 2.8.4) +cmake_minimum_required(VERSION 2.8.5) # guard against in-source builds @@ -55,6 +55,7 @@ endif(EIGEN_HG_CHANGESET) include(CheckCXXCompilerFlag) +include(GNUInstallDirs) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) @@ -337,23 +338,23 @@ option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tens include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -# the user modifiable install path for header files -set(EIGEN_INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "The directory where we install the header files (optional)") +set(INCLUDE_INSTALL_DIR + "${CMAKE_INSTALL_INCLUDEDIR}/eigen3" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed" + ) +set(CMAKEPACKAGE_INSTALL_DIR + "${CMAKE_INSTALL_LIBDIR}/cmake/eigen3" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed" + ) +set(PKGCONFIG_INSTALL_DIR + "${CMAKE_INSTALL_DATADIR}/pkgconfig" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed" + ) -# set the internal install path for header files which depends on wether the user modifiable -# EIGEN_INCLUDE_INSTALL_DIR has been set by the user or not. -if(EIGEN_INCLUDE_INSTALL_DIR) - set(INCLUDE_INSTALL_DIR - ${EIGEN_INCLUDE_INSTALL_DIR} - CACHE INTERNAL - "The directory where we install the header files (internal)" - ) -else() - set(INCLUDE_INSTALL_DIR - "include/eigen3" - CACHE INTERNAL - "The directory where we install the header files (internal)" - ) +# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR +if(DEFINED EIGEN_INCLUDE_INSTALL_DIR) + message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.") + set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "" FORCE) endif() # similar to set_target_properties but append the property instead of overwriting it @@ -373,21 +374,9 @@ install(FILES ) if(EIGEN_BUILD_PKGCONFIG) - SET(path_separator ":") - STRING(REPLACE ${path_separator} ";" pkg_config_libdir_search "$ENV{PKG_CONFIG_LIBDIR}") - message(STATUS "searching for 'pkgconfig' directory in PKG_CONFIG_LIBDIR ( $ENV{PKG_CONFIG_LIBDIR} ), ${CMAKE_INSTALL_PREFIX}/share, and ${CMAKE_INSTALL_PREFIX}/lib") - FIND_PATH(pkg_config_libdir pkgconfig ${pkg_config_libdir_search} ${CMAKE_INSTALL_PREFIX}/share ${CMAKE_INSTALL_PREFIX}/lib ${pkg_config_libdir_search}) - if(pkg_config_libdir) - SET(pkg_config_install_dir ${pkg_config_libdir}) - message(STATUS "found ${pkg_config_libdir}/pkgconfig" ) - else(pkg_config_libdir) - SET(pkg_config_install_dir ${CMAKE_INSTALL_PREFIX}/share) - message(STATUS "pkgconfig not found; installing in ${pkg_config_install_dir}" ) - endif(pkg_config_libdir) - - configure_file(eigen3.pc.in eigen3.pc) + configure_file(eigen3.pc.in eigen3.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc - DESTINATION ${pkg_config_install_dir}/pkgconfig + DESTINATION ${PKGCONFIG_INSTALL_DIR} ) endif(EIGEN_BUILD_PKGCONFIG) @@ -450,12 +439,15 @@ if(cmake_generator_tolower MATCHES "makefile") message(STATUS "--------------+--------------------------------------------------------------") message(STATUS "Command | Description") message(STATUS "--------------+--------------------------------------------------------------") - message(STATUS "make install | Install to ${CMAKE_INSTALL_PREFIX}. To change that:") - message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourpath") - message(STATUS " | Eigen headers will then be installed to:") - message(STATUS " | ${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}") - message(STATUS " | To install Eigen headers to a separate location, do:") - message(STATUS " | cmake . -DEIGEN_INCLUDE_INSTALL_DIR=yourpath") + message(STATUS "make install | Install Eigen. Headers will be installed to:") + message(STATUS " | /") + message(STATUS " | Using the following values:") + message(STATUS " | CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") + message(STATUS " | INCLUDE_INSTALL_DIR: ${INCLUDE_INSTALL_DIR}") + message(STATUS " | Change the install location of Eigen headers using:") + message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourprefix") + message(STATUS " | Or:") + message(STATUS " | cmake . -DINCLUDE_INSTALL_DIR=yourdir") message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX") message(STATUS "make check | Build and run the unit-tests. Read this page:") message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests") @@ -469,21 +461,13 @@ endif() message(STATUS "") -set ( EIGEN_CONFIG_CMAKE_PATH - lib${LIB_SUFFIX}/cmake/eigen3 - CACHE PATH "The directory where the CMake files are installed" - ) -if ( NOT IS_ABSOLUTE EIGEN_CONFIG_CMAKE_PATH ) - set ( EIGEN_CONFIG_CMAKE_PATH ${CMAKE_INSTALL_PREFIX}/${EIGEN_CONFIG_CMAKE_PATH} ) -endif () -set ( EIGEN_USE_FILE ${EIGEN_CONFIG_CMAKE_PATH}/UseEigen3.cmake ) set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} ) set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} ) set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} ) set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} ) set ( EIGEN_DEFINITIONS "") -set ( EIGEN_INCLUDE_DIR ${INCLUDE_INSTALL_DIR} ) +set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" ) set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} ) set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} ) @@ -494,7 +478,7 @@ configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake - DESTINATION ${EIGEN_CONFIG_CMAKE_PATH} + DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} ) # Add uninstall target diff --git a/cmake/Eigen3Config.cmake.in b/cmake/Eigen3Config.cmake.in index e50f6dbe0..04e7886ce 100644 --- a/cmake/Eigen3Config.cmake.in +++ b/cmake/Eigen3Config.cmake.in @@ -15,7 +15,7 @@ # EIGEN3_VERSION_PATCH - The patch version of Eigen set ( EIGEN3_FOUND 1 ) -set ( EIGEN3_USE_FILE "@EIGEN_USE_FILE@" ) +set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" ) set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" ) set ( EIGEN3_INCLUDE_DIR "@EIGEN_INCLUDE_DIR@" ) diff --git a/eigen3.pc.in b/eigen3.pc.in index c5855de33..3368a3aa1 100644 --- a/eigen3.pc.in +++ b/eigen3.pc.in @@ -1,6 +1,9 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} + Name: Eigen3 Description: A C++ template library for linear algebra: vectors, matrices, and related algorithms Requires: -Version: ${EIGEN_VERSION_NUMBER} +Version: @EIGEN_VERSION_NUMBER@ Libs: -Cflags: -I${INCLUDE_INSTALL_DIR} +Cflags: -I${prefix}/@INCLUDE_INSTALL_DIR@ From 228edfe616b03e99b2b777238139d75294e1a1b8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 09:26:23 -0800 Subject: [PATCH 204/344] Use Eigen::NumTraits instead of std::numeric_limits --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 5bf21ab61..e7b09e562 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -155,7 +155,7 @@ class TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> operator- (Scalar rhs) const { - EIGEN_STATIC_ASSERT((std::numeric_limits::is_signed || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return unaryExpr(internal::scalar_sub_op(rhs)); } @@ -168,14 +168,13 @@ class TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> operator/ (Scalar rhs) const { - // EIGEN_STATIC_ASSERT(!std::numeric_limits::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE); return unaryExpr(internal::scalar_quotient1_op(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> operator% (Scalar rhs) const { - EIGEN_STATIC_ASSERT(std::numeric_limits::is_integer, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); + EIGEN_STATIC_ASSERT(NumTraits::IsInteger, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); return unaryExpr(internal::scalar_mod_op(rhs)); } From 5cb18e5b5eac0fa18cd91bee117696540d0a063e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 14:36:33 -0800 Subject: [PATCH 205/344] Fixed CUDA compilation errors --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 95fc9fec6..bc06ca1f0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -229,7 +229,7 @@ inline void TensorExecutor::run(const Expression& /*static*/ template -inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) +inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) { TensorEvaluator evaluator(expr, device); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index d4e88fabd..c1cdb98a4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -336,6 +336,12 @@ struct FullReducer { }; #endif + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +template +__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); +#endif + } // end namespace internal From 99f4778506770d601ffff7b3ca0510585854e74c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 15:04:58 -0800 Subject: [PATCH 206/344] Disable SFINAE when compiling with nvcc --- unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h index 939de5f11..8ed71f838 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -27,6 +27,7 @@ */ // SFINAE requires variadic templates +#ifndef __CUDACC__ #ifdef EIGEN_HAS_VARIADIC_TEMPLATES // SFINAE doesn't work for gcc <= 4.7 #ifdef EIGEN_COMP_GNUC @@ -36,7 +37,7 @@ #else #define EIGEN_HAS_SFINAE #endif - +#endif #endif #define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ From 4f471146fbb22dab20896ae2b1c31d0f549cd10e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 15:19:00 -0800 Subject: [PATCH 207/344] Allow the vectorized version of the Binary and the Nullary functors to run on GPU --- Eigen/src/Core/functors/BinaryFunctors.h | 30 +++++++++++------------ Eigen/src/Core/functors/NullaryFunctors.h | 10 ++++---- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index f77066910..90d8b7d1d 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -26,7 +26,7 @@ template struct scalar_sum_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::padd(a,b); } template EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const @@ -65,7 +65,7 @@ template struct scalar_product_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmul(a,b); } template EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const @@ -97,7 +97,7 @@ template struct scalar_conj_product_op { { return conj_helper().pmul(a,b); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return conj_helper().pmul(a,b); } }; template @@ -117,10 +117,10 @@ template struct scalar_min_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmin(a,b); } template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const { return internal::predux_min(a); } }; template @@ -140,10 +140,10 @@ template struct scalar_max_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmax(a,b); } template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const { return internal::predux_max(a); } }; template @@ -260,7 +260,7 @@ template struct scalar_difference_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::psub(a,b); } }; template @@ -285,7 +285,7 @@ template struct scalar_quotient_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pdiv(a,b); } }; template @@ -357,7 +357,7 @@ struct scalar_multiple_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pmul(a, pset1(m_other)); } typename add_const_on_value_type::Nested>::type m_other; }; @@ -392,7 +392,7 @@ struct scalar_quotient1_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pdiv(a, pset1(m_other)); } typename add_const_on_value_type::Nested>::type m_other; }; @@ -434,7 +434,7 @@ struct scalar_add_op { EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { } EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::padd(a, pset1(m_other)); } const Scalar m_other; }; @@ -452,7 +452,7 @@ struct scalar_sub_op { inline scalar_sub_op(const Scalar& other) : m_other(other) { } inline Scalar operator() (const Scalar& a) const { return a - m_other; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::psub(a, pset1(m_other)); } const Scalar m_other; }; @@ -470,7 +470,7 @@ struct scalar_rsub_op { inline scalar_rsub_op(const Scalar& other) : m_other(other) { } inline Scalar operator() (const Scalar& a) const { return m_other - a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::psub(pset1(m_other), a); } const Scalar m_other; }; @@ -504,7 +504,7 @@ struct scalar_inverse_mult_op { scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; } template - inline const Packet packetOp(const Packet& a) const + EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::pdiv(pset1(m_other),a); } Scalar m_other; }; diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 55d45f26f..cd9fbf267 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -21,7 +21,7 @@ struct scalar_constant_op { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } template - EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1(m_other); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1(m_other); } const Scalar m_other; }; template @@ -63,7 +63,7 @@ struct linspaced_op_impl } template - EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } const Scalar m_low; const Scalar m_step; @@ -85,7 +85,7 @@ struct linspaced_op_impl EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } const Scalar m_low; @@ -120,12 +120,12 @@ template struct linspa } template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since // there row==0 and col is used for the actual iteration. template - EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const { eigen_assert(col==0 || row==0); return impl.packetOp(col + row); From 7f1c29fb0c26e92f31446926c441e13b0f6aec68 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 15:22:50 -0800 Subject: [PATCH 208/344] Make it possible for a vectorized tensor expression to be executed in a CUDA kernel. --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index bc06ca1f0..956672771 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -50,6 +50,7 @@ class TensorExecutor { public: typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) { TensorEvaluator evaluator(expr, device); @@ -57,7 +58,7 @@ class TensorExecutor if (needs_assign) { const Index size = array_prod(evaluator.dimensions()); - static const int PacketSize = unpacket_traits::PacketReturnType>::size; + const int PacketSize = unpacket_traits::PacketReturnType>::size; const Index VectorizedSize = (size / PacketSize) * PacketSize; for (Index i = 0; i < VectorizedSize; i += PacketSize) { From c587293e4821c61de45b6fee7ff5c3a4cbc33f1c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 15:35:12 -0800 Subject: [PATCH 209/344] Fixed a compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index ae02d15a2..114871f49 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -214,7 +214,7 @@ template ::value; default: eigen_assert(false && "index overflow"); - return static_cast(-1); + return static_cast(-1); } } From 9fa10fe52d30525505ca97afd1fce7cda44d27e5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 11 Nov 2015 15:38:30 -0800 Subject: [PATCH 210/344] Don't use std::array when compiling with nvcc since nvidia doesn't support the use of STL containers on GPU. --- unsupported/Eigen/CXX11/Core | 2 +- unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/Core b/unsupported/Eigen/CXX11/Core index 292f09564..cadfaaff1 100644 --- a/unsupported/Eigen/CXX11/Core +++ b/unsupported/Eigen/CXX11/Core @@ -33,7 +33,7 @@ #include // Emulate the cxx11 functionality that we need if the compiler doesn't support it. -#if __cplusplus <= 199711L +#if __cplusplus <= 199711L || defined(__CUDACC__) #include "src/Core/util/EmulateCXX11Meta.h" #else #include diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h index ecd1bddf1..ce34a7f04 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h @@ -23,7 +23,8 @@ template class array { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } - static const std::size_t size() { return n; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + static std::size_t size() { return n; } T values[n]; @@ -105,7 +106,8 @@ template class array { } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES - array(std::initializer_list l) { + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(std::initializer_list l) { eigen_assert(l.size() == n); internal::smart_copy(l.begin(), l.end(), values); } From e701cb2c7c839dbbcf0982eaa30861d221dd3e19 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 12 Nov 2015 12:09:19 +0100 Subject: [PATCH 211/344] Update EIGEN_FAST_MATH doc --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index bf894a518..426025150 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -382,7 +382,7 @@ /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: - * - single precision ArrayBase::sin() and ArrayBase::cos() when SSE vectorization is enabled. + * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization. */ #ifndef EIGEN_FAST_MATH #define EIGEN_FAST_MATH 1 From dfbb889fe9e0e8d117d6634a00c2b1c4a7c3fd3b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 12 Nov 2015 12:09:48 +0100 Subject: [PATCH 212/344] Fix missing Dynamic versus HugeCost changes --- Eigen/src/Core/Solve.h | 2 +- Eigen/src/Core/TriangularMatrix.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index cb06028b1..2d163fe2a 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -52,7 +52,7 @@ struct traits > typedef traits BaseTraits; enum { Flags = BaseTraits::Flags & RowMajorBit, - CoeffReadCost = Dynamic + CoeffReadCost = HugeCost }; }; diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 438dd4dc9..099a02ec3 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -804,7 +804,7 @@ EIGEN_DEVICE_FUNC void call_triangular_assignment_loop(const DstXprType& dst, co enum { unroll = DstXprType::SizeAtCompileTime != Dynamic - && SrcEvaluatorType::CoeffReadCost != Dynamic + && SrcEvaluatorType::CoeffReadCost < HugeCost && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT }; From aa5f1ca714bacdd2e25772977eac241cafc18162 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 08:30:10 -0800 Subject: [PATCH 213/344] gen_numeric_list takes a size_t, not a int --- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 3952e733c..785321666 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -110,7 +110,7 @@ namespace internal{ } /** Make an array (for index/dimensions) out of a custom index */ - template + template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array customIndices2Array(IndexType& idx) { return customIndices2Array(idx, typename gen_numeric_list::type{}); From 1e072424e8de83c671cbbda9e9c6769cf8395462 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 08:57:04 -0800 Subject: [PATCH 214/344] Moved the array code into it's own file. --- unsupported/Eigen/CXX11/Core | 5 +- .../Eigen/CXX11/src/Core/util/CXX11Meta.h | 40 ++-- .../CXX11/src/Core/util/CXX11Workarounds.h | 30 --- .../Eigen/CXX11/src/Core/util/EmulateArray.h | 221 ++++++++++++++++++ .../CXX11/src/Core/util/EmulateCXX11Meta.h | 155 ------------ 5 files changed, 244 insertions(+), 207 deletions(-) create mode 100644 unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h diff --git a/unsupported/Eigen/CXX11/Core b/unsupported/Eigen/CXX11/Core index cadfaaff1..c8dcf7c16 100644 --- a/unsupported/Eigen/CXX11/Core +++ b/unsupported/Eigen/CXX11/Core @@ -32,11 +32,12 @@ #include +#include "src/Core/util/EmulateArray.h" + // Emulate the cxx11 functionality that we need if the compiler doesn't support it. -#if __cplusplus <= 199711L || defined(__CUDACC__) +#if __cplusplus <= 199711L #include "src/Core/util/EmulateCXX11Meta.h" #else -#include #include "src/Core/util/CXX11Workarounds.h" #include "src/Core/util/CXX11Meta.h" #endif diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 7e4929ff8..4e2630bc2 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -334,7 +334,7 @@ constexpr inline Array h_array_reverse(Array arr, numeric_list) } template -constexpr inline std::array array_reverse(std::array arr) +constexpr inline array array_reverse(array arr) { return h_array_reverse(arr, typename gen_numeric_list::type()); } @@ -349,7 +349,7 @@ constexpr inline std::array array_reverse(std::array arr) // an infinite loop) template struct h_array_reduce { - constexpr static inline auto run(std::array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) + constexpr static inline auto run(array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) { return Reducer::run(h_array_reduce::run(arr, identity), array_get(arr)); } @@ -358,7 +358,7 @@ struct h_array_reduce { template struct h_array_reduce { - constexpr static inline T run(const std::array& arr, T) + constexpr static inline T run(const array& arr, T) { return array_get<0>(arr); } @@ -367,14 +367,14 @@ struct h_array_reduce template struct h_array_reduce { - constexpr static inline T run(const std::array&, T identity) + constexpr static inline T run(const array&, T identity) { return identity; } }; template -constexpr inline auto array_reduce(const std::array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) +constexpr inline auto array_reduce(const array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) { return h_array_reduce::run(arr, identity); } @@ -382,13 +382,13 @@ constexpr inline auto array_reduce(const std::array& arr, T identity) -> d /* standard array reductions */ template -constexpr inline auto array_sum(const std::array& arr) -> decltype(array_reduce(arr, static_cast(0))) +constexpr inline auto array_sum(const array& arr) -> decltype(array_reduce(arr, static_cast(0))) { return array_reduce(arr, static_cast(0)); } template -constexpr inline auto array_prod(const std::array& arr) -> decltype(array_reduce(arr, static_cast(1))) +constexpr inline auto array_prod(const array& arr) -> decltype(array_reduce(arr, static_cast(1))) { return array_reduce(arr, static_cast(1)); } @@ -404,13 +404,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector& a) { /* zip an array */ template -constexpr inline std::array h_array_zip(std::array a, std::array b, numeric_list) +constexpr inline array h_array_zip(array a, array b, numeric_list) { - return std::array{{ Op::run(array_get(a), array_get(b))... }}; + return array{{ Op::run(array_get(a), array_get(b))... }}; } template -constexpr inline std::array array_zip(std::array a, std::array b) +constexpr inline array array_zip(array a, array b) { return h_array_zip(a, b, typename gen_numeric_list::type()); } @@ -418,13 +418,13 @@ constexpr inline std::array array_zip(std::array< /* zip an array and reduce the result */ template -constexpr inline auto h_array_zip_and_reduce(std::array a, std::array b, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(a), array_get(b))...)) +constexpr inline auto h_array_zip_and_reduce(array a, array b, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(a), array_get(b))...)) { return reduce::type...>::run(Op::run(array_get(a), array_get(b))...); } template -constexpr inline auto array_zip_and_reduce(std::array a, std::array b) -> decltype(h_array_zip_and_reduce(a, b, typename gen_numeric_list::type())) +constexpr inline auto array_zip_and_reduce(array a, array b) -> decltype(h_array_zip_and_reduce(a, b, typename gen_numeric_list::type())) { return h_array_zip_and_reduce(a, b, typename gen_numeric_list::type()); } @@ -432,13 +432,13 @@ constexpr inline auto array_zip_and_reduce(std::array a, std::array /* apply stuff to an array */ template -constexpr inline std::array h_array_apply(std::array a, numeric_list) +constexpr inline array h_array_apply(array a, numeric_list) { - return std::array{{ Op::run(array_get(a))... }}; + return array{{ Op::run(array_get(a))... }}; } template -constexpr inline std::array array_apply(std::array a) +constexpr inline array array_apply(array a) { return h_array_apply(a, typename gen_numeric_list::type()); } @@ -446,34 +446,34 @@ constexpr inline std::array array_apply(std::array -constexpr inline auto h_array_apply_and_reduce(std::array arr, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(arr))...)) +constexpr inline auto h_array_apply_and_reduce(array arr, numeric_list) -> decltype(reduce::type...>::run(Op::run(array_get(arr))...)) { return reduce::type...>::run(Op::run(array_get(arr))...); } template -constexpr inline auto array_apply_and_reduce(std::array a) -> decltype(h_array_apply_and_reduce(a, typename gen_numeric_list::type())) +constexpr inline auto array_apply_and_reduce(array a) -> decltype(h_array_apply_and_reduce(a, typename gen_numeric_list::type())) { return h_array_apply_and_reduce(a, typename gen_numeric_list::type()); } /* repeat a value n times (and make an array out of it * usage: - * std::array = repeat<16>(42); + * array = repeat<16>(42); */ template struct h_repeat { template - constexpr static inline std::array run(t v, numeric_list) + constexpr static inline array run(t v, numeric_list) { return {{ typename id_numeric::type(v)... }}; } }; template -constexpr std::array repeat(t v) { return h_repeat::run(v, typename gen_numeric_list::type()); } +constexpr array repeat(t v) { return h_repeat::run(v, typename gen_numeric_list::type()); } /* instantiate a class by a C-style array */ template diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h index a590cf4e1..b1528aa66 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Workarounds.h @@ -39,46 +39,16 @@ namespace Eigen { -// Use std::array as Eigen array -template using array = std::array; - namespace internal { /* std::get is only constexpr in C++14, not yet in C++11 - * - libstdc++ from version 4.7 onwards has it nevertheless, - * so use that - * - libstdc++ older versions: use _M_instance directly - * - libc++ all versions so far: use __elems_ directly - * - all other libs: use std::get to be portable, but - * this may not be constexpr */ -#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 -#define STD_GET_ARR_HACK a._M_instance[I] -#elif defined(_LIBCPP_VERSION) -#define STD_GET_ARR_HACK a.__elems_[I] -#else -#define STD_GET_ARR_HACK std::template get(a) -#endif -template constexpr inline T& array_get(std::array& a) { return (T&) STD_GET_ARR_HACK; } -template constexpr inline T&& array_get(std::array&& a) { return (T&&) STD_GET_ARR_HACK; } -template constexpr inline T const& array_get(std::array const& a) { return (T const&) STD_GET_ARR_HACK; } template constexpr inline T& array_get(std::vector& a) { return a[I]; } template constexpr inline T&& array_get(std::vector&& a) { return a[I]; } template constexpr inline T const& array_get(std::vector const& a) { return a[I]; } -#undef STD_GET_ARR_HACK - -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; - /* Suppose you have a template of the form * template struct X; * And you want to specialize it in such a way: diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h new file mode 100644 index 000000000..25d91cc79 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h @@ -0,0 +1,221 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EMULATE_ARRAY_H +#define EIGEN_EMULATE_ARRAY_H + + +namespace Eigen { + +// The array class is only available starting with cxx11. Emulate our own here +// if needed. +// Moreover, CUDA doesn't support the STL containers, so we use our own instead. +#if __cplusplus <= 199711L || defined(__CUDACC__) + +template class array { + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + static std::size_t size() { return n; } + + T values[n]; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array() { } + explicit EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v) { + EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { + EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { + EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, + const T& v4) { + EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5) { + EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6) { + EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7) { + EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array( + const T& v1, const T& v2, const T& v3, const T& v4, + const T& v5, const T& v6, const T& v7, const T& v8) { + EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) + values[0] = v1; + values[1] = v2; + values[2] = v3; + values[3] = v4; + values[4] = v5; + values[5] = v6; + values[6] = v7; + values[7] = v8; + } + +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array(std::initializer_list l) { + eigen_assert(l.size() == n); + internal::smart_copy(l.begin(), l.end(), values); + } +#endif +}; + + +// Specialize array for zero size +template class array { + public: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& operator[] (size_t) { + eigen_assert(false && "Can't index a zero size array"); + return *static_cast(NULL); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& operator[] (size_t) const { + eigen_assert(false && "Can't index a zero size array"); + return *static_cast(NULL); + } + + static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE array() { } + +#ifdef EIGEN_HAS_VARIADIC_TEMPLATES + array(std::initializer_list l) { + eigen_assert(l.size() == 0); + } +#endif +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array& a) { + return a[I]; +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array& a) { + return a[I]; +} + +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size& > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size& > { + static const size_t value = N; +}; + +} + +#else +#include +// The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen array +template using array = std::array; + +namespace internal { +/* std::get is only constexpr in C++14, not yet in C++11 + * - libstdc++ from version 4.7 onwards has it nevertheless, + * so use that + * - libstdc++ older versions: use _M_instance directly + * - libc++ all versions so far: use __elems_ directly + * - all other libs: use std::get to be portable, but + * this may not be constexpr + */ +#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 +#define STD_GET_ARR_HACK a._M_instance[I] +#elif defined(_LIBCPP_VERSION) +#define STD_GET_ARR_HACK a.__elems_[I] +#else +#define STD_GET_ARR_HACK std::template get(a) +#endif + +template constexpr inline T& array_get(std::array& a) { return (T&) STD_GET_ARR_HACK; } +template constexpr inline T&& array_get(std::array&& a) { return (T&&) STD_GET_ARR_HACK; } +template constexpr inline T const& array_get(std::array const& a) { return (T const&) STD_GET_ARR_HACK; } + +#undef STD_GET_ARR_HACK + +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +template struct array_size; +template struct array_size > { + static const size_t value = N; +}; +} + +#endif + + +} // end namespace Eigen + + + +#endif // EIGEN_EMULATE_ARRAY_H diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h index ce34a7f04..d685d4f9d 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h @@ -14,136 +14,6 @@ namespace Eigen { -// The array class is only available starting with cxx11. Emulate our own here -// if needed -template class array { - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - static std::size_t size() { return n; } - - T values[n]; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array() { } - explicit EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v) { - EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { - EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { - EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, - const T& v4) { - EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5) { - EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6) { - EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7) { - EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array( - const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7, const T& v8) { - EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - values[7] = v8; - } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(std::initializer_list l) { - eigen_assert(l.size() == n); - internal::smart_copy(l.begin(), l.end(), values); - } -#endif -}; - - -// Specialize array for zero size -template class array { - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& operator[] (size_t) { - eigen_assert(false && "Can't index a zero size array"); - return *static_cast(NULL); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& operator[] (size_t) const { - eigen_assert(false && "Can't index a zero size array"); - return *static_cast(NULL); - } - - static EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array() { } - -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES - array(std::initializer_list l) { - eigen_assert(l.size() == 0); - } -#endif -}; - - - namespace internal { /** \internal @@ -329,14 +199,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector& a) { return prod; } -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array& a) { - return a[I]; -} -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array& a) { - return a[I]; -} template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector& a) { @@ -347,23 +209,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector& a return a[I]; } -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size& > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size > { - static const size_t value = N; -}; -template struct array_size; -template struct array_size& > { - static const size_t value = N; -}; - struct sum_op { template static inline bool run(A a, B b) { return a + b; } }; From 737d237722c887d5e0841fd5b89e5ef02054a095 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 09:02:59 -0800 Subject: [PATCH 215/344] Made it possible to run some of the CXXMeta functions on a CUDA device. --- .../Eigen/CXX11/src/Core/util/CXX11Meta.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h index 4e2630bc2..3f149c6a3 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/CXX11Meta.h @@ -283,11 +283,11 @@ template< /* generic binary operations */ struct sum_op { - template constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } + template EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } static constexpr int Identity = 0; }; struct product_op { - template constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } + template EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } static constexpr int Identity = 1; }; @@ -349,7 +349,7 @@ constexpr inline array array_reverse(array arr) // an infinite loop) template struct h_array_reduce { - constexpr static inline auto run(array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) + EIGEN_DEVICE_FUNC constexpr static inline auto run(array arr, T identity) -> decltype(Reducer::run(h_array_reduce::run(arr, identity), array_get(arr))) { return Reducer::run(h_array_reduce::run(arr, identity), array_get(arr)); } @@ -358,7 +358,7 @@ struct h_array_reduce { template struct h_array_reduce { - constexpr static inline T run(const array& arr, T) + EIGEN_DEVICE_FUNC constexpr static inline T run(const array& arr, T) { return array_get<0>(arr); } @@ -367,14 +367,14 @@ struct h_array_reduce template struct h_array_reduce { - constexpr static inline T run(const array&, T identity) + EIGEN_DEVICE_FUNC constexpr static inline T run(const array&, T identity) { return identity; } }; template -constexpr inline auto array_reduce(const array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) +EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array& arr, T identity) -> decltype(h_array_reduce::run(arr, identity)) { return h_array_reduce::run(arr, identity); } @@ -382,13 +382,13 @@ constexpr inline auto array_reduce(const array& arr, T identity) -> declty /* standard array reductions */ template -constexpr inline auto array_sum(const array& arr) -> decltype(array_reduce(arr, static_cast(0))) +EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array& arr) -> decltype(array_reduce(arr, static_cast(0))) { return array_reduce(arr, static_cast(0)); } template -constexpr inline auto array_prod(const array& arr) -> decltype(array_reduce(arr, static_cast(1))) +EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array& arr) -> decltype(array_reduce(arr, static_cast(1))) { return array_reduce(arr, static_cast(1)); } From 7a1316fcc564b709b361592c6897591d9747c401 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 11:05:54 -0800 Subject: [PATCH 216/344] Fixed compilation error with xcode. --- .../Eigen/CXX11/src/Core/util/EmulateArray.h | 14 +- .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 184 +++++++++++++----- 2 files changed, 148 insertions(+), 50 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h index 25d91cc79..9a239ef6d 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h @@ -11,13 +11,13 @@ #define EIGEN_EMULATE_ARRAY_H -namespace Eigen { // The array class is only available starting with cxx11. Emulate our own here // if needed. // Moreover, CUDA doesn't support the STL containers, so we use our own instead. #if __cplusplus <= 199711L || defined(__CUDACC__) +namespace Eigen { template class array { public: EIGEN_DEVICE_FUNC @@ -171,11 +171,15 @@ template struct array_size& > { static const size_t value = N; }; -} +} // end namespace internal +} // end namespace Eigen #else -#include + // The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen array +#include +namespace Eigen { + template using array = std::array; namespace internal { @@ -209,12 +213,12 @@ template struct array_size; template struct array_size > { static const size_t value = N; }; -} +} // end namespace internal +} // end namespace Eigen #endif -} // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 78e1d2bd1..dcd2464f1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -77,38 +77,128 @@ struct is_compile_time_constant& > { static constexpr bool value = true; }; + + + +template +struct IndexTuple; + +template +struct IndexTuple { + constexpr IndexTuple() : head(), others() { } + constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { } + + constexpr static int count = 1 + sizeof...(O); + T head; + IndexTuple others; + typedef T Head; + typedef IndexTuple Other; +}; + +template + struct IndexTuple { + constexpr IndexTuple() : head() { } + constexpr IndexTuple(const T& v) : head(v) { } + + constexpr static int count = 1; + T head; + typedef T Head; +}; + + +template +struct IndexTupleExtractor; + +template +struct IndexTupleExtractor { + + typedef typename IndexTupleExtractor::ValType ValType; + + static constexpr ValType& get_val(IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + + static constexpr const ValType& get_val(const IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + template + static void set_val(IndexTuple& val, V& new_val) { + IndexTupleExtractor::set_val(val.others, new_val); + } + +}; + + template + struct IndexTupleExtractor<0, T, O...> { + + typedef T ValType; + + static constexpr ValType& get_val(IndexTuple& val) { + return val.head; + } + static constexpr const ValType& get_val(const IndexTuple& val) { + return val.head; + } + template + static void set_val(IndexTuple& val, V& new_val) { + val.head = new_val; + } +}; + + + +template +constexpr typename IndexTupleExtractor::ValType& array_get(IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template +constexpr const typename IndexTupleExtractor::ValType& array_get(const IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; + + + + template struct tuple_coeff { template - static constexpr DenseIndex get(const DenseIndex i, const std::tuple& t) { - return std::get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); + static constexpr DenseIndex get(const DenseIndex i, const IndexTuple& t) { + return array_get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); } template - static void set(const DenseIndex i, std::tuple& t, const DenseIndex value) { + static void set(const DenseIndex i, IndexTuple& t, const DenseIndex value) { if (i == Idx) { - update_value(std::get(t), value); + update_value(array_get(t), value); } else { tuple_coeff::set(i, t, value); } } template - static constexpr bool value_known_statically(const DenseIndex i, const std::tuple& t) { - return ((i == Idx) & is_compile_time_constant >::type>::value) || + static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple& t) { + return ((i == Idx) & is_compile_time_constant::ValType>::value) || tuple_coeff::value_known_statically(i, t); } template - static constexpr bool values_up_to_known_statically(const std::tuple& t) { - return is_compile_time_constant >::type>::value && + static constexpr bool values_up_to_known_statically(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && tuple_coeff::values_up_to_known_statically(t); } template - static constexpr bool values_up_to_statically_known_to_increase(const std::tuple& t) { - return is_compile_time_constant >::type>::value && - is_compile_time_constant >::type>::value && - std::get(t) > std::get(t) && + static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + is_compile_time_constant::ValType>::value && + array_get(t) > array_get(t) && tuple_coeff::values_up_to_statically_known_to_increase(t); } }; @@ -116,62 +206,66 @@ struct tuple_coeff { template <> struct tuple_coeff<0> { template - static constexpr DenseIndex get(const DenseIndex i, const std::tuple& t) { + static constexpr DenseIndex get(const DenseIndex i, const IndexTuple& t) { // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return std::get<0>(t) * (i == 0); + return array_get<0>(t) * (i == 0); } template - static void set(const DenseIndex i, std::tuple& t, const DenseIndex value) { + static void set(const DenseIndex i, IndexTuple& t, const DenseIndex value) { eigen_assert (i == 0); - update_value(std::get<0>(t), value); + update_value(array_get<0>(t), value); } template - static constexpr bool value_known_statically(const DenseIndex i, const std::tuple&) { - // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return is_compile_time_constant >::type>::value & (i == 0); + static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple&) { + return is_compile_time_constant::ValType>::value & (i == 0); } template - static constexpr bool values_up_to_known_statically(const std::tuple&) { - return is_compile_time_constant >::type>::value; + static constexpr bool values_up_to_known_statically(const IndexTuple&) { + return is_compile_time_constant::ValType>::value; } template - static constexpr bool values_up_to_statically_known_to_increase(const std::tuple&) { + static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple&) { return true; } }; } // namespace internal + template -struct IndexList : std::tuple { + struct IndexList : internal::IndexTuple { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { - return internal::tuple_coeff >::value-1>::get(i, *this); + return internal::tuple_coeff >::value-1>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const { + return internal::tuple_coeff >::value-1>::get(i, *this); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { - return internal::tuple_coeff >::value-1>::set(i, *this, value); + return internal::tuple_coeff >::value-1>::set(i, *this, value); } - constexpr IndexList(const std::tuple& other) : std::tuple(other) { } - constexpr IndexList() : std::tuple() { } + constexpr IndexList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple(first, other...) { } + constexpr IndexList() : internal::IndexTuple() { } constexpr bool value_known_statically(const DenseIndex i) const { - return internal::tuple_coeff >::value-1>::value_known_statically(i, *this); + return internal::tuple_coeff >::value-1>::value_known_statically(i, *this); } constexpr bool all_values_known_statically() const { - return internal::tuple_coeff >::value-1>::values_up_to_known_statically(*this); + return internal::tuple_coeff >::value-1>::values_up_to_known_statically(*this); } constexpr bool values_statically_known_to_increase() const { - return internal::tuple_coeff >::value-1>::values_up_to_statically_known_to_increase(*this); + return internal::tuple_coeff >::value-1>::values_up_to_statically_known_to_increase(*this); } }; template constexpr IndexList make_index_list(FirstType val1, OtherTypes... other_vals) { - return std::make_tuple(val1, other_vals...); + return IndexList(val1, other_vals...); } @@ -186,17 +280,17 @@ template size_t array_prod(const Ind } template struct array_size > { - static const size_t value = std::tuple_size >::value; + static const size_t value = array_size >::value; }; template struct array_size > { - static const size_t value = std::tuple_size >::value; + static const size_t value = array_size >::value; }; -template constexpr DenseIndex array_get(IndexList& a) { - return std::get(a); +template constexpr DenseIndex array_get(IndexList& a) { + return IndexTupleExtractor::get_val(a); } -template constexpr DenseIndex array_get(const IndexList& a) { - return std::get(a); +template constexpr DenseIndex array_get(const IndexList& a) { + return IndexTupleExtractor::get_val(a); } template @@ -273,7 +367,7 @@ template struct index_statically_eq > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] == value); + (IndexList().get(i) == value); } }; @@ -281,7 +375,7 @@ template struct index_statically_eq > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] == value); + (IndexList().get(i) == value); } }; @@ -296,7 +390,7 @@ template struct index_statically_ne > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] != value); + (IndexList().get(i) != value); } }; @@ -304,7 +398,7 @@ template struct index_statically_ne > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] != value); + (IndexList().get(i) != value); } }; @@ -320,7 +414,7 @@ template struct index_statically_gt > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] > value); + (IndexList().get(i) > value); } }; @@ -328,7 +422,7 @@ template struct index_statically_gt > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] > value); + (IndexList().get(i) > value); } }; @@ -343,7 +437,7 @@ template struct index_statically_lt > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] < value); + (IndexList().get(i) < value); } }; @@ -351,7 +445,7 @@ template struct index_statically_lt > { constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { return IndexList().value_known_statically(i) & - (IndexList()[i] < value); + (IndexList().get(i) < value); } }; From e9ecfad7967fd5285846647372897ecdc125f976 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 16:41:14 -0800 Subject: [PATCH 217/344] Started to make the IndexList code compile by more compilers --- .../CXX11/src/Tensor/TensorDimensionList.h | 16 +++++----- .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 30 +++++++++++-------- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 8 ++--- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index 17d89d5e1..da7782188 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -72,14 +72,14 @@ struct all_indices_known_statically > { }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { +struct indices_statically_known_to_increase_impl > { + static constexpr bool run() { return true; } }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { +struct indices_statically_known_to_increase_impl > { + static constexpr bool run() { return true; } }; @@ -164,14 +164,14 @@ struct all_indices_known_statically > { }; template -struct indices_statically_known_to_increase > { - EIGEN_ALWAYS_INLINE bool operator() () const { +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { return true; } }; template -struct indices_statically_known_to_increase > { - EIGEN_ALWAYS_INLINE bool operator() () const { +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { return true; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index dcd2464f1..69d21d3c1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -336,26 +336,32 @@ struct all_indices_known_statically > }; template -struct indices_statically_known_to_increase { - constexpr bool operator() () const { +struct indices_statically_known_to_increase_impl { + static constexpr bool run() { return false; } }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { - return IndexList().values_statically_known_to_increase(); + struct indices_statically_known_to_increase_impl > { + static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); } }; template -struct indices_statically_known_to_increase > { - constexpr bool operator() () const { - return IndexList().values_statically_known_to_increase(); + struct indices_statically_known_to_increase_impl > { + static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); } }; +template +static constexpr bool indices_statically_known_to_increase() { + return indices_statically_known_to_increase_impl::run(); +} + + template struct index_statically_eq { constexpr bool operator() (DenseIndex, DenseIndex) const { @@ -473,11 +479,9 @@ struct all_indices_known_statically { }; template -struct indices_statically_known_to_increase { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const { - return false; - } -}; +static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_known_to_increase() { + return false; +} template struct index_statically_eq { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index c1cdb98a4..4d3e25d87 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -88,14 +88,14 @@ struct preserve_inner_most_dims { #if defined(EIGEN_HAS_CONSTEXPR) && defined(EIGEN_HAS_VARIADIC_TEMPLATES) template struct are_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); + static const bool tmp1 = indices_statically_known_to_increase(); static const bool tmp2 = index_statically_eq()(0, 0); static const bool tmp3 = index_statically_eq()(array_size::value-1, array_size::value-1); static const bool value = tmp1 & tmp2 & tmp3; }; template struct are_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); + static const bool tmp1 = indices_statically_known_to_increase(); static const bool tmp2 = index_statically_eq()(0, NumTensorDims - array_size::value); static const bool tmp3 = index_statically_eq()(array_size::value - 1, NumTensorDims - 1); static const bool value = tmp1 & tmp2 & tmp3; @@ -103,14 +103,14 @@ struct are_inner_most_dims{ }; template struct preserve_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); + static const bool tmp1 = indices_statically_known_to_increase(); static const bool tmp2 = index_statically_gt()(0, 0); static const bool value = tmp1 & tmp2; }; template struct preserve_inner_most_dims{ - static const bool tmp1 = indices_statically_known_to_increase()(); + static const bool tmp1 = indices_statically_known_to_increase(); static const bool tmp2 = index_statically_lt()(array_size::value - 1, NumTensorDims - 1); static const bool value = tmp1 & tmp2; }; From 8037826367a7becab046df2a8be08a28806a625f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 17:19:45 -0800 Subject: [PATCH 218/344] Simplified more of the IndexList code. --- Eigen/src/Core/util/Macros.h | 3 +- .../CXX11/src/Tensor/TensorBroadcasting.h | 32 ++--- .../CXX11/src/Tensor/TensorDimensionList.h | 64 +++++----- .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 112 ++++++++++-------- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 12 +- 5 files changed, 121 insertions(+), 102 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 426025150..9540c0330 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -353,9 +353,10 @@ // Does the compiler support const expressions? #ifdef __CUDACC__ +#define EIGEN_HAS_CONSTEXPR 1 // Const expressions are not supported regardless of what host compiler is used #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \ - EIGEN_GNUC_AT_LEAST(4,9) + EIGEN_GNUC_AT_LEAST(4,8) #define EIGEN_HAS_CONSTEXPR 1 #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index c7af02b11..dc64959e1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -156,11 +156,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -168,11 +168,11 @@ struct TensorEvaluator, Device> } index -= idx * m_outputStrides[i]; } - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index < m_impl.dimensions()[0]); inputIndex += index; } else { - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index % m_impl.dimensions()[0] == 0); } else { inputIndex += (index % m_impl.dimensions()[0]); @@ -186,11 +186,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -198,11 +198,11 @@ struct TensorEvaluator, Device> } index -= idx * m_outputStrides[i]; } - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index < m_impl.dimensions()[NumDims-1]); inputIndex += index; } else { - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); } else { inputIndex += (index % m_impl.dimensions()[NumDims-1]); @@ -235,11 +235,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -248,11 +248,11 @@ struct TensorEvaluator, Device> index -= idx * m_outputStrides[i]; } Index innermostLoc; - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index < m_impl.dimensions()[0]); innermostLoc = index; } else { - if (internal::index_statically_eq()(0, 1)) { + if (internal::index_statically_eq(0, 1)) { eigen_assert(index % m_impl.dimensions()[0] == 0); innermostLoc = 0; } else { @@ -288,11 +288,11 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx < m_impl.dimensions()[i]); inputIndex += idx * m_inputStrides[i]; } else { - if (internal::index_statically_eq()(i, 1)) { + if (internal::index_statically_eq(i, 1)) { eigen_assert(idx % m_impl.dimensions()[i] == 0); } else { inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; @@ -301,11 +301,11 @@ struct TensorEvaluator, Device> index -= idx * m_outputStrides[i]; } Index innermostLoc; - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index < m_impl.dimensions()[NumDims-1]); innermostLoc = index; } else { - if (internal::index_statically_eq()(NumDims-1, 1)) { + if (internal::index_statically_eq(NumDims-1, 1)) { eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); innermostLoc = 0; } else { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index da7782188..206808245 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -85,53 +85,53 @@ struct indices_statically_known_to_increase_impl -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i == value; } }; template -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i == value; } }; template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i != value; } }; template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i != value; } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i > value; } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i > value; } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_lt_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i < value; } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_lt_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i < value; } }; @@ -177,53 +177,53 @@ struct indices_statically_known_to_increase_impl -struct index_statically_eq > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { return false; } }; template -struct index_statically_eq > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { return false; } }; template -struct index_statically_ne > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() (const DenseIndex, const DenseIndex) const { return false; } }; template -struct index_statically_ne > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { return false; } }; template -struct index_statically_gt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() (const DenseIndex, const DenseIndex) const { return false; } }; template -struct index_statically_gt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { return false; } }; template -struct index_statically_lt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() (const DenseIndex, const DenseIndex) const { return false; } }; template -struct index_statically_lt > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { return false; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 69d21d3c1..472fad0da 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -363,98 +363,124 @@ static constexpr bool indices_statically_known_to_increase() { template -struct index_statically_eq { - constexpr bool operator() (DenseIndex, DenseIndex) const { +struct index_statically_eq_impl { + static constexpr bool run(DenseIndex, DenseIndex) { return false; } }; template -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) == value); } }; template -struct index_statically_eq > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_eq_impl > { +static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) == value); } }; template -struct index_statically_ne { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; +static constexpr bool index_statically_eq(DenseIndex i, DenseIndex value) { + return index_statically_eq_impl::run(i, value); +} + + +template +struct index_statically_ne_impl { + static constexpr bool run(DenseIndex, DenseIndex) { + return false; } }; template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) != value); } }; template -struct index_statically_ne > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_ne_impl > { +static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) != value); } }; +template +static constexpr bool index_statically_ne(DenseIndex i, DenseIndex value) { + return index_statically_ne_impl::run(i, value); +} + + template -struct index_statically_gt { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; +struct index_statically_gt_impl { + static constexpr bool run(DenseIndex, DenseIndex) { + return false; } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) > value); } }; template -struct index_statically_gt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_gt_impl > { +static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) > value); } }; template -struct index_statically_lt { - constexpr bool operator() (DenseIndex, DenseIndex) const { - return false; +static constexpr bool index_statically_gt(DenseIndex i, DenseIndex value) { + return index_statically_gt_impl::run(i, value); +} + + + + +template +struct index_statically_lt_impl { + static constexpr bool run(DenseIndex, DenseIndex) { + return false; } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_lt_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) < value); } }; template -struct index_statically_lt > { - constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { +struct index_statically_lt_impl > { +static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) < value); } }; +template +static constexpr bool index_statically_lt(DenseIndex i, DenseIndex value) { + return index_statically_lt_impl::run(i, value); +} + + } // end namespace internal } // end namespace Eigen @@ -484,32 +510,24 @@ static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_known_to_in } template -struct index_statically_eq { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; +static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_eq(DenseIndex, DenseIndex) { + return false; +} template -struct index_statically_ne { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; +static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_ne(DenseIndex, DenseIndex) { + return false; +} template -struct index_statically_gt { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; +static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_gt(DenseIndex, DenseIndex) { + return false; +} template -struct index_statically_lt { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex, DenseIndex) const{ - return false; - } -}; +static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_lt(DenseIndex, DenseIndex) { + return false; +} } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 4d3e25d87..bd15295b8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -89,29 +89,29 @@ struct preserve_inner_most_dims { template struct are_inner_most_dims{ static const bool tmp1 = indices_statically_known_to_increase(); - static const bool tmp2 = index_statically_eq()(0, 0); - static const bool tmp3 = index_statically_eq()(array_size::value-1, array_size::value-1); + static const bool tmp2 = index_statically_eq(0, 0); + static const bool tmp3 = index_statically_eq(array_size::value-1, array_size::value-1); static const bool value = tmp1 & tmp2 & tmp3; }; template struct are_inner_most_dims{ static const bool tmp1 = indices_statically_known_to_increase(); - static const bool tmp2 = index_statically_eq()(0, NumTensorDims - array_size::value); - static const bool tmp3 = index_statically_eq()(array_size::value - 1, NumTensorDims - 1); + static const bool tmp2 = index_statically_eq(0, NumTensorDims - array_size::value); + static const bool tmp3 = index_statically_eq(array_size::value - 1, NumTensorDims - 1); static const bool value = tmp1 & tmp2 & tmp3; }; template struct preserve_inner_most_dims{ static const bool tmp1 = indices_statically_known_to_increase(); - static const bool tmp2 = index_statically_gt()(0, 0); + static const bool tmp2 = index_statically_gt(0, 0); static const bool value = tmp1 & tmp2; }; template struct preserve_inner_most_dims{ static const bool tmp1 = indices_statically_known_to_increase(); - static const bool tmp2 = index_statically_lt()(array_size::value - 1, NumTensorDims - 1); + static const bool tmp2 = index_statically_lt(array_size::value - 1, NumTensorDims - 1); static const bool value = tmp1 & tmp2; }; #endif From 150c12e138ab372d7f43ce19b260acd36ede9fc3 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 18:11:56 -0800 Subject: [PATCH 219/344] Completed the IndexList rewrite --- .../CXX11/src/Tensor/TensorDimensionList.h | 16 +++--- .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 50 +++++++++++-------- unsupported/test/cxx11_tensor_index_list.cpp | 34 ++++++------- 3 files changed, 53 insertions(+), 47 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index 206808245..170dacb98 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -46,27 +46,27 @@ template const Index array_get(c #if defined(EIGEN_HAS_CONSTEXPR) template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + static constexpr bool run(const DenseIndex) { return true; } }; template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + static constexpr bool run(const DenseIndex) { return true; } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + static constexpr bool run() { return true; } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + static constexpr bool run() { return true; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 472fad0da..859c16ab0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -294,47 +294,57 @@ template constexpr Den } template -struct index_known_statically { - constexpr bool operator() (DenseIndex) const { +struct index_known_statically_impl { + static constexpr bool run(const DenseIndex) { return false; } }; template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex i) const { +struct index_known_statically_impl > { + static constexpr bool run(const DenseIndex i) { return IndexList().value_known_statically(i); } }; template -struct index_known_statically > { - constexpr bool operator() (const DenseIndex i) const { +struct index_known_statically_impl > { + static constexpr bool run(const DenseIndex i) { return IndexList().value_known_statically(i); } }; template -struct all_indices_known_statically { - constexpr bool operator() () const { +static constexpr bool index_known_statically(const DenseIndex i) { + return index_known_statically_impl::run(i); +} + +template +struct all_indices_known_statically_impl { + static constexpr bool run() { return false; } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + static constexpr bool run() { return IndexList().all_values_known_statically(); } }; template -struct all_indices_known_statically > { - constexpr bool operator() () const { +struct all_indices_known_statically_impl > { + static constexpr bool run() { return IndexList().all_values_known_statically(); } }; +template +static constexpr bool all_indices_known_statically() { + return all_indices_known_statically_impl::run(); +} + template struct indices_statically_known_to_increase_impl { static constexpr bool run() { @@ -491,18 +501,14 @@ namespace internal { // No C++11 support template -struct index_known_statically { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() (DenseIndex) const{ - return false; - } -}; +static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool index_known_statically(DenseIndex) { + return false; +} template -struct all_indices_known_statically { - EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool operator() () const { - return false; - } -}; +static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool all_indices_known_statically() { + return false; +} template static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_known_to_increase() { diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp index 7100c1628..4ce8dea20 100644 --- a/unsupported/test/cxx11_tensor_index_list.cpp +++ b/unsupported/test/cxx11_tensor_index_list.cpp @@ -58,11 +58,11 @@ static void test_type2index_list() typedef Eigen::IndexList, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> Dims4; #if 0 - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #endif EIGEN_STATIC_ASSERT((internal::are_inner_most_dims::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); @@ -216,24 +216,24 @@ static void test_mixed_index_list() reduction_indices.set(3, 3); EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_indices) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_indices) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_known_statically()(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_known_statically()(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #if 0 - EIGEN_STATIC_ASSERT((internal::all_indices_known_statically()() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::all_indices_known_statically() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); #endif typedef IndexList, type2index<1>, type2index<2>, type2index<3>> ReductionList; ReductionList reduction_list; - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::index_statically_eq()(3, 3) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq(3, 3) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #if 0 - EIGEN_STATIC_ASSERT((internal::all_indices_known_statically()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase()() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::all_indices_known_statically() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); #endif Tensor result1 = tensor.sum(reduction_axis); From e4d45f3440013666eb26860a7912e6d7e594ee6c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 18:18:35 -0800 Subject: [PATCH 220/344] Only enable the use of const expression when nvcc is called with the -std=c++11 option --- Eigen/src/Core/util/Macros.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 9540c0330..f4034ebdb 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -353,8 +353,10 @@ // Does the compiler support const expressions? #ifdef __CUDACC__ -#define EIGEN_HAS_CONSTEXPR 1 - // Const expressions are not supported regardless of what host compiler is used +// Const expressions are supported provided that c++11 is enabled +#if __cplusplus > 199711L + #define EIGEN_HAS_CONSTEXPR 1 +#endif #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \ EIGEN_GNUC_AT_LEAST(4,8) #define EIGEN_HAS_CONSTEXPR 1 From be08e8295357ca21eaa2623cfc0cbbdeb231ae37 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 18:37:40 -0800 Subject: [PATCH 221/344] Fixed typos --- .../Eigen/CXX11/src/Tensor/TensorDimensionList.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index 170dacb98..e3074734b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -138,26 +138,26 @@ struct index_statically_lt_impl > { #else template -struct index_known_statically > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { return true; } }; template -struct index_known_statically > { - EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { +struct index_known_statically_impl > { + static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { return true; } }; template -struct all_indices_known_statically > { +struct all_indices_known_statically_impl > { EIGEN_ALWAYS_INLINE bool operator() () const { return true; } }; template -struct all_indices_known_statically > { +struct all_indices_known_statically_impl > { EIGEN_ALWAYS_INLINE bool operator() () const { return true; } From 2c73633b2806357ddab4be2599c68a1759872f1f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 18:39:19 -0800 Subject: [PATCH 222/344] Fixed a few more typos --- unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index e3074734b..b7adc3bcc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -152,13 +152,13 @@ struct index_known_statically_impl > { template struct all_indices_known_statically_impl > { - EIGEN_ALWAYS_INLINE bool operator() () const { + static EIGEN_ALWAYS_INLINE bool run() { return true; } }; template struct all_indices_known_statically_impl > { - EIGEN_ALWAYS_INLINE bool operator() () const { + static EIGEN_ALWAYS_INLINE bool run() { return true; } }; From 0aaa5941dfe1292d19249f177cb289d8b37595af Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 19:11:43 -0800 Subject: [PATCH 223/344] Silenced some compilation warnings triggered by nvcc --- .../CXX11/src/Tensor/TensorDimensionList.h | 48 +++--- .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 159 ++++++++++-------- 2 files changed, 113 insertions(+), 94 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h index b7adc3bcc..ca9ac79df 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -47,39 +47,39 @@ template const Index array_get(c #if defined(EIGEN_HAS_CONSTEXPR) template struct index_known_statically_impl > { - static constexpr bool run(const DenseIndex) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { return true; } }; template struct index_known_statically_impl > { - static constexpr bool run(const DenseIndex) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { return true; } }; template struct all_indices_known_statically_impl > { - static constexpr bool run() { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; template struct all_indices_known_statically_impl > { - static constexpr bool run() { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; template struct indices_statically_known_to_increase_impl > { - static constexpr bool run() { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; template struct indices_statically_known_to_increase_impl > { - static constexpr bool run() { + EIGEN_DEVICE_FUNC static constexpr bool run() { return true; } }; @@ -92,14 +92,14 @@ struct index_statically_eq_impl > { }; template struct index_statically_eq_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i == value; } }; template struct index_statically_ne_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i != value; } }; @@ -112,26 +112,26 @@ struct index_statically_ne_impl > { template struct index_statically_gt_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i > value; } }; template struct index_statically_gt_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i > value; } }; template struct index_statically_lt_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i < value; } }; template struct index_statically_lt_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return i < value; } }; @@ -139,26 +139,26 @@ struct index_statically_lt_impl > { #else template struct index_known_statically_impl > { - static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { return true; } }; template struct index_known_statically_impl > { - static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { return true; } }; template struct all_indices_known_statically_impl > { - static EIGEN_ALWAYS_INLINE bool run() { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { return true; } }; template struct all_indices_known_statically_impl > { - static EIGEN_ALWAYS_INLINE bool run() { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { return true; } }; @@ -178,52 +178,52 @@ struct indices_statically_known_to_increase_impl struct index_statically_eq_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template struct index_statically_eq_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template struct index_statically_ne_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() (const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){ return false; } }; template struct index_statically_ne_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template struct index_statically_gt_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() (const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template struct index_statically_gt_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template struct index_statically_lt_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() (const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; template struct index_statically_lt_impl > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) const { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { return false; } }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 859c16ab0..74ce6d0ec 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -314,10 +314,6 @@ struct index_known_statically_impl > { } }; -template -static constexpr bool index_known_statically(const DenseIndex i) { - return index_known_statically_impl::run(i); -} template struct all_indices_known_statically_impl { @@ -340,10 +336,6 @@ struct all_indices_known_statically_impl -static constexpr bool all_indices_known_statically() { - return all_indices_known_statically_impl::run(); -} template struct indices_statically_known_to_increase_impl { @@ -366,22 +358,17 @@ template } }; -template -static constexpr bool indices_statically_known_to_increase() { - return indices_statically_known_to_increase_impl::run(); -} - template struct index_statically_eq_impl { - static constexpr bool run(DenseIndex, DenseIndex) { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { return false; } }; template struct index_statically_eq_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) == value); } @@ -389,28 +376,23 @@ struct index_statically_eq_impl > { template struct index_statically_eq_impl > { -static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) == value); } }; -template -static constexpr bool index_statically_eq(DenseIndex i, DenseIndex value) { - return index_statically_eq_impl::run(i, value); -} - template struct index_statically_ne_impl { - static constexpr bool run(DenseIndex, DenseIndex) { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { return false; } }; template struct index_statically_ne_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) != value); } @@ -418,29 +400,23 @@ struct index_statically_ne_impl > { template struct index_statically_ne_impl > { -static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) != value); } }; -template -static constexpr bool index_statically_ne(DenseIndex i, DenseIndex value) { - return index_statically_ne_impl::run(i, value); -} - - template struct index_statically_gt_impl { - static constexpr bool run(DenseIndex, DenseIndex) { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { return false; } }; template struct index_statically_gt_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) > value); } @@ -448,30 +424,24 @@ struct index_statically_gt_impl > { template struct index_statically_gt_impl > { -static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) > value); } }; -template -static constexpr bool index_statically_gt(DenseIndex i, DenseIndex value) { - return index_statically_gt_impl::run(i, value); -} - - template struct index_statically_lt_impl { - static constexpr bool run(DenseIndex, DenseIndex) { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { return false; } }; template struct index_statically_lt_impl > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) < value); } @@ -479,18 +449,12 @@ struct index_statically_lt_impl > { template struct index_statically_lt_impl > { -static constexpr bool run(const DenseIndex i, const DenseIndex value) { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { return IndexList().value_known_statically(i) & (IndexList().get(i) < value); } }; -template -static constexpr bool index_statically_lt(DenseIndex i, DenseIndex value) { - return index_statically_lt_impl::run(i, value); -} - - } // end namespace internal } // end namespace Eigen @@ -499,45 +463,100 @@ static constexpr bool index_statically_lt(DenseIndex i, DenseIndex value) { namespace Eigen { namespace internal { -// No C++11 support template -static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool index_known_statically(DenseIndex) { - return false; -} +struct index_known_statically_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const DenseIndex) { + return false; + } +}; template -static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool all_indices_known_statically() { - return false; -} +struct all_indices_known_statically_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run() { + return false; + } +}; template -static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_known_to_increase() { - return false; -} +struct indices_statically_known_to_increase_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run() { + return false; + } +}; template -static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_eq(DenseIndex, DenseIndex) { - return false; -} +struct index_statically_eq_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + return false; + } +}; template -static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_ne(DenseIndex, DenseIndex) { - return false; -} +struct index_statically_ne_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + return false; + } +}; template -static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_gt(DenseIndex, DenseIndex) { - return false; -} +struct index_statically_gt_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + return false; + } +}; template -static EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC bool indices_statically_lt(DenseIndex, DenseIndex) { - return false; -} +struct index_statically_lt_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(DenseIndex, DenseIndex) { + return false; + } +}; } // end namespace internal } // end namespace Eigen #endif + +namespace Eigen { +namespace internal { +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(DenseIndex i) { + return index_known_statically_impl::run(i); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() { + return all_indices_known_statically_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() { + return indices_statically_known_to_increase_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(DenseIndex i, DenseIndex value) { + return index_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(DenseIndex i, DenseIndex value) { + return index_statically_ne_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(DenseIndex i, DenseIndex value) { + return index_statically_gt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, DenseIndex value) { + return index_statically_lt_impl::run(i, value); +} + +} // end namespace internal +} // end namespace Eigen + + #endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H From b69248fa2af444df769309a632a1e95fb79e3689 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 20:01:50 -0800 Subject: [PATCH 224/344] Added a couple of missing EIGEN_DEVICE_FUNC --- unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 04da9a458..65fd25a2e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -121,7 +121,7 @@ struct TensorEvaluator, Device> m_impl.cleanup(); return true; } - EIGEN_STRONG_INLINE void cleanup() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_device.deallocate(m_buffer); m_buffer = NULL; } @@ -132,7 +132,7 @@ struct TensorEvaluator, Device> } template - EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { return internal::ploadt(m_buffer + index); } From ed4b37de0250412516841cf2ded3f90037d6b6d6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 20:08:01 -0800 Subject: [PATCH 225/344] Fixed a few compilation warnings --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h index 300ee8ac0..71fd4a6af 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -296,6 +296,7 @@ static void initializeDeviceProp() { if (!m_devicePropInitialized) { int num_devices; cudaError_t status = cudaGetDeviceCount(&num_devices); + EIGEN_UNUSED_VARIABLE(status) assert(status == cudaSuccess); m_deviceProperties = new cudaDeviceProp[num_devices]; for (int i = 0; i < num_devices; ++i) { @@ -331,6 +332,7 @@ class CudaStreamDevice : public StreamInterface { } else { int num_devices; cudaError_t err = cudaGetDeviceCount(&num_devices); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); assert(device < num_devices); device_ = device; @@ -344,6 +346,7 @@ class CudaStreamDevice : public StreamInterface { } virtual void* allocate(size_t num_bytes) const { cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); void* result; err = cudaMalloc(&result, num_bytes); @@ -353,6 +356,7 @@ class CudaStreamDevice : public StreamInterface { } virtual void deallocate(void* buffer) const { cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); assert(buffer != NULL); err = cudaFree(buffer); @@ -399,6 +403,7 @@ struct GpuDevice { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -409,6 +414,7 @@ struct GpuDevice { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -419,6 +425,7 @@ struct GpuDevice { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -428,6 +435,7 @@ struct GpuDevice { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else eigen_assert(false && "The default device should be used instead to generate kernel code"); @@ -453,6 +461,7 @@ struct GpuDevice { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { #if defined(__CUDACC__) && !defined(__CUDA_ARCH__) cudaError_t err = cudaStreamSynchronize(stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) assert(err == cudaSuccess); #else assert(false && "The default device should be used instead to generate kernel code"); @@ -501,6 +510,7 @@ struct GpuDevice { #ifdef __CUDACC__ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { cudaError_t status = cudaDeviceSetSharedMemConfig(config); + EIGEN_UNUSED_VARIABLE(status) assert(status == cudaSuccess); } #endif From 10a91930cc90cf0c8f3053d74bd101e92d08a331 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 20:10:52 -0800 Subject: [PATCH 226/344] Fixed a compilation warning triggered by nvcc --- unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h index ee3bf7fe3..d4f9a725d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -216,9 +216,10 @@ struct TensorEvaluator, Devi : m_orig_impl(op.expression(), device), m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), m_return_dim(op.return_dim()), - m_strides(gen_strides(m_orig_impl.dimensions())), m_stride_mod(gen_stride_mod(m_orig_impl.dimensions())), - m_stride_div(gen_stride_div()) { } + m_stride_div(gen_stride_div()) { + gen_strides(m_orig_impl.dimensions(), m_strides); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); @@ -240,9 +241,10 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } private: - EIGEN_DEVICE_FUNC StrideDims gen_strides(const InputDimensions& dims) { - StrideDims strides; - if (m_return_dim < 0) return strides; // Won't be using these. + EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { + if (m_return_dim < 0) { + return; // Won't be using the strides. + } eigen_assert(m_return_dim < NumDims && "Asking to convert index to a dimension outside of the rank"); @@ -259,7 +261,6 @@ struct TensorEvaluator, Devi strides[i] = strides[i+1] * dims[i+1]; } } - return strides; } EIGEN_DEVICE_FUNC Index gen_stride_mod(const InputDimensions& dims) { @@ -278,7 +279,7 @@ struct TensorEvaluator, Devi TensorEvaluator, Device> m_orig_impl; TensorEvaluator >, Device> m_impl; const int m_return_dim; - const StrideDims m_strides; + StrideDims m_strides; const Index m_stride_mod; const Index m_stride_div; }; From 7815b84be4e9bdb7473a64d0e4c35ec6430669ec Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 20:16:59 -0800 Subject: [PATCH 227/344] Fixed a compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index f6bd949bd..6a81d3c71 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -1227,9 +1227,9 @@ struct TensorEvaluator::type EvalLeftArgType; + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; typedef typename internal::conditional< - Layout == ColMajor, RightArgType, LeftArgType>::type EvalRightArgType; + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; static const int LDims = internal::array_size::Dimensions>::value; From 1e1755352d02dd0abcd1955bc126f50d42ca60ba Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 12 Nov 2015 20:19:38 -0800 Subject: [PATCH 228/344] Made it possible to compute atan, tanh, sinh and cosh on GPU --- Eigen/src/Core/functors/UnaryFunctors.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 62826654f..c897046bd 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -409,7 +409,7 @@ struct functor_traits > */ template struct scalar_atan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op) - inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); } template EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); } }; @@ -428,7 +428,7 @@ struct functor_traits > */ template struct scalar_tanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) - inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } template EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } }; @@ -447,7 +447,7 @@ struct functor_traits > */ template struct scalar_sinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op) - inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); } template EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); } }; @@ -466,7 +466,7 @@ struct functor_traits > */ template struct scalar_cosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op) - inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); } template EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); } }; From f1fbd74db9d7c349e6612250d5dd1863c24e8495 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 13 Nov 2015 09:07:27 -0800 Subject: [PATCH 229/344] Added sanity check --- unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index e092c0e04..10328c61f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -66,7 +66,7 @@ class TensorReverseOp : public TensorBase, Device const Device& device) : m_impl(op.expression(), device), m_reverse(op.reverse()) { + // Reversing a scalar isn't supported yet. It would be a no-op anyway. + EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Compute strides m_dimensions = m_impl.dimensions(); if (static_cast(Layout) == static_cast(ColMajor)) { From bf792f59e3bbfc3b16e9807257a57120f5c5ff04 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 13 Nov 2015 12:24:22 -0800 Subject: [PATCH 230/344] Only enable the use of constexpr with nvcc if we're using version 7.5 or above --- Eigen/src/Core/util/Macros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index f4034ebdb..bc81c1e82 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -353,8 +353,8 @@ // Does the compiler support const expressions? #ifdef __CUDACC__ -// Const expressions are supported provided that c++11 is enabled -#if __cplusplus > 199711L +// Const expressions are supported provided that c++11 is enabled and we're using nvcc 7.5 or above +#if defined(__CUDACC_VER__) && __CUDACC_VER__ >= 70500 && __cplusplus > 199711L #define EIGEN_HAS_CONSTEXPR 1 #endif #elif (defined(__cplusplus) && __cplusplus >= 201402L) || \ From a64156cae5e4fc72a783a14584f1140c5a68a3b3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 16 Nov 2015 13:33:54 +0100 Subject: [PATCH 231/344] Workaround i387 issue in unit test --- test/ref.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/ref.cpp b/test/ref.cpp index 1341dfef7..769db0414 100644 --- a/test/ref.cpp +++ b/test/ref.cpp @@ -18,6 +18,18 @@ // test Ref.h +// Deal with i387 extended precision +#if EIGEN_ARCH_i386 && !(EIGEN_ARCH_x86_64) + +#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(4,4) +#pragma GCC optimize ("-ffloat-store") +#else +#undef VERIFY_IS_EQUAL +#define VERIFY_IS_EQUAL(X,Y) VERIFY_IS_APPROX(X,Y) +#endif + +#endif + template void ref_matrix(const MatrixType& m) { typedef typename MatrixType::Index Index; @@ -55,7 +67,6 @@ template void ref_matrix(const MatrixType& m) rm2 = m2.block(i,j,brows,bcols); VERIFY_IS_EQUAL(m1, m2); - ConstRefDynMat rm3 = m1.block(i,j,brows,bcols); m1.block(i,j,brows,bcols) *= 2; m2.block(i,j,brows,bcols) *= 2; From 4926251f130faca49ffc743e88e397eb3e9db9c5 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Wed, 18 Nov 2015 10:55:23 -0500 Subject: [PATCH 232/344] bug #1115: enable static alignment on ARM outside of old-GCC --- Eigen/src/Core/util/Macros.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index bc81c1e82..8def69610 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -612,10 +612,14 @@ namespace Eigen { // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in // certain common platform (compiler+architecture combinations) to avoid these problems. - // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't - // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even - // when we have to disable static alignment. - #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) + // Only static alignment is really problematic (relies on nonstandard compiler extensions), + // try to keep heap alignment even when we have to disable static alignment. + #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6) + // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support. + // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use. + // 4.8 and newer seem definitely unaffected. #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 #else #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 From 1dd444ea71d30cc3a1eab7af0ba3f6a0357ae93c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 18 Nov 2015 11:37:58 -0800 Subject: [PATCH 233/344] Avoid using the version of TensorIntDiv optimized for 32-bit integers when the divisor can be equal to one since it isn't supported. --- .../Eigen/CXX11/src/Tensor/TensorIntDiv.h | 9 ++++--- unsupported/test/cxx11_tensor_intdiv.cpp | 27 ++++++++++++++++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index fd2441894..058fb2c42 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -116,7 +116,7 @@ namespace { } -template +template struct TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { @@ -166,8 +166,9 @@ struct TensorIntDivisor { // Optimized version for signed 32 bit integers. // Derived from Hacker's Delight. +// Only works for divisors strictly greater than one template <> -class TensorIntDivisor { +class TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { magic = 0; @@ -226,8 +227,8 @@ private: }; -template -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { return divisor.divide(numerator); } diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp index 343b37dbd..fd6d27ae1 100644 --- a/unsupported/test/cxx11_tensor_intdiv.cpp +++ b/unsupported/test/cxx11_tensor_intdiv.cpp @@ -14,8 +14,29 @@ void test_signed_32bit() { + // Divide by one + const Eigen::internal::TensorIntDivisor div(1); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / 1; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + + // Standard divide by 2 or more for (int32_t i = 2; i < 25000; ++i) { - const Eigen::internal::TensorIntDivisor div(i); + const Eigen::internal::TensorIntDivisor div(i); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } + + // Optimized divide by 2 or more + for (int32_t i = 2; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor div(i); for (int32_t j = 0; j < 25000; ++j) { const int32_t fast_div = j / div; @@ -42,7 +63,7 @@ void test_unsigned_32bit() void test_signed_64bit() { - for (int64_t i = 2; i < 25000; ++i) { + for (int64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor div(i); for (int64_t j = 0; j < 25000; ++j) { @@ -56,7 +77,7 @@ void test_signed_64bit() void test_unsigned_64bit() { - for (uint64_t i = 2; i < 25000; ++i) { + for (uint64_t i = 1; i < 25000; ++i) { const Eigen::internal::TensorIntDivisor div(i); for (uint64_t j = 0; j < 25000; ++j) { From 199499910525b41f33be6034be164292d7d9d853 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 18 Nov 2015 23:29:07 +0100 Subject: [PATCH 234/344] Add regression unit test for prod.maxCoeff(i) --- test/visitor.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/visitor.cpp b/test/visitor.cpp index 39a5d6b5f..844170ec6 100644 --- a/test/visitor.cpp +++ b/test/visitor.cpp @@ -55,6 +55,11 @@ template void matrixVisitor(const MatrixType& p) VERIFY_IS_APPROX(maxc, eigen_maxc); VERIFY_IS_APPROX(minc, m.minCoeff()); VERIFY_IS_APPROX(maxc, m.maxCoeff()); + + eigen_maxc = (m.adjoint()*m).maxCoeff(&eigen_maxrow,&eigen_maxcol); + eigen_maxc = (m.adjoint()*m).eval().maxCoeff(&maxrow,&maxcol); + VERIFY(maxrow == eigen_maxrow); + VERIFY(maxcol == eigen_maxcol); } template void vectorVisitor(const VectorType& w) From 7d1cedd0feef2088a7edfa23acb78401bad4a272 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 18 Nov 2015 17:17:44 -0800 Subject: [PATCH 235/344] Added numeric limits for unsigned integers --- Eigen/src/Core/util/Meta.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index a7e7555e9..e7fa2a1f1 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -163,6 +163,15 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static int (min)() { return INT_MIN; } }; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned int epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned int (max)() { return UINT_MAX; } + EIGEN_DEVICE_FUNC + static unsigned int (min)() { return 0; } +}; template<> struct numeric_limits { EIGEN_DEVICE_FUNC @@ -172,6 +181,15 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static long (min)() { return LONG_MIN; } }; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned long (max)() { return ULONG_MAX; } + EIGEN_DEVICE_FUNC + static unsigned long (min)() { return 0; } +}; template<> struct numeric_limits { EIGEN_DEVICE_FUNC @@ -181,6 +199,15 @@ template<> struct numeric_limits EIGEN_DEVICE_FUNC static long long (min)() { return LLONG_MIN; } }; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned long long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned long long (max)() { return ULLONG_MAX; } + EIGEN_DEVICE_FUNC + static unsigned long long (min)() { return 0; } +}; } From f8df393165a37cadc3314ecceed43715ad09f52e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Nov 2015 13:57:27 -0800 Subject: [PATCH 236/344] Added support for 128bit integers on CUDA devices. --- unsupported/Eigen/CXX11/Tensor | 1 + .../Eigen/CXX11/src/Tensor/TensorUInt128.h | 233 ++++++++++++++++++ unsupported/test/CMakeLists.txt | 1 + unsupported/test/cxx11_tensor_uint128.cpp | 144 +++++++++++ 4 files changed, 379 insertions(+) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h create mode 100644 unsupported/test/cxx11_tensor_uint128.cpp diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 1e3d2c06a..7e59af964 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -67,6 +67,7 @@ #include "src/Tensor/TensorInitializer.h" #include "src/Tensor/TensorTraits.h" #include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorUInt128.h" #include "src/Tensor/TensorIntDiv.h" #include "src/Tensor/TensorBase.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h new file mode 100644 index 000000000..2b0808629 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -0,0 +1,233 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H +#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H + +namespace Eigen { +namespace internal { + + +template +struct static_val { + static const uint64_t value = n; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { + eigen_assert(v == n); + } +}; + + +template +struct TensorUInt128 +{ + HIGH high; + LOW low; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(int x) : high(0), low(x) { + eigen_assert(x >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(int64_t x) : high(0), low(x) { + eigen_assert(x >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(uint64_t x) : high(0), low(x) { } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { + return low; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { + return high; + } +}; + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator == (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + return (lhs.high == rhs.high) & (lhs.low == rhs.low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator != (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + return (lhs.high != rhs.high) | (lhs.low != rhs.low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator >= (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high > rhs.high; + } + return lhs.low >= rhs.low; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static bool operator < (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (lhs.high != rhs.high) { + return lhs.high < rhs.high; + } + return lhs.low < rhs.low; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static TensorUInt128 operator + (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + TensorUInt128 result(lhs.high + rhs.high, lhs.low + rhs.low); + if (result.low < rhs.low) { + result.high += 1; + } + return result; +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +static TensorUInt128 operator - (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + TensorUInt128 result(lhs.high - rhs.high, lhs.low - rhs.low); + if (result.low > lhs.low) { + result.high -= 1; + } + return result; +} + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +static TensorUInt128 operator * (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + // Split each 128-bit integer into 4 32-bit integers, and then do the + // multiplications by hand as follow: + // lhs a b c d + // rhs e f g h + // ----------- + // ah bh ch dh + // bg cg dg + // cf df + // de + // The result is stored in 2 64bit integers, high and low. + + static const uint64_t LOW = 0x00000000FFFFFFFFLL; + static const uint64_t HIGH = 0xFFFFFFFF00000000LL; + + uint64_t d = lhs.low & LOW; + uint64_t c = (lhs.low & HIGH) >> 32LL; + uint64_t b = lhs.high & LOW; + uint64_t a = (lhs.high & HIGH) >> 32LL; + + uint64_t h = rhs.low & LOW; + uint64_t g = (rhs.low & HIGH) >> 32LL; + uint64_t f = rhs.high & LOW; + uint64_t e = (rhs.high & HIGH) >> 32LL; + + // Compute the low 32 bits of low + uint64_t acc = d * h; + uint64_t low = acc & LOW; + // Compute the high 32 bits of low. Add a carry every time we wrap around + acc >>= 32LL; + uint64_t carry = 0; + uint64_t acc2 = acc + c * h; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * g; + if (acc < acc2) { + carry++; + } + low |= (acc << 32LL); + + // Carry forward the high bits of acc to initiate the computation of the + // low 32 bits of high + acc2 = (acc >> 32LL) | (carry << 32LL); + carry = 0; + + acc = acc2 + b * h; + if (acc < acc2) { + carry++; + } + acc2 = acc + c * g; + if (acc2 < acc) { + carry++; + } + acc = acc2 + d * f; + if (acc < acc2) { + carry++; + } + uint64_t high = acc & LOW; + + // Start to compute the high 32 bits of high. + acc2 = (acc >> 32LL) | (carry << 32LL); + + acc = acc2 + a * h; + acc2 = acc + b * g; + acc = acc2 + c * f; + acc2 = acc + d * e; + high |= (acc2 << 32LL); + + return TensorUInt128(high, low); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +static TensorUInt128 operator / (const TensorUInt128& lhs, const TensorUInt128& rhs) +{ + if (rhs == TensorUInt128, static_val<1> >(1)) { + return TensorUInt128(lhs.high, lhs.low); + } else if (lhs < rhs) { + return TensorUInt128(0); + } else { + // calculate the biggest power of 2 times rhs that's less than or equal to lhs + TensorUInt128 power2(1); + TensorUInt128 d(rhs); + TensorUInt128 tmp(lhs - d); + while (lhs >= d) { + tmp = tmp - d; + d = d + d; + power2 = power2 + power2; + } + + tmp = TensorUInt128(lhs.high, lhs.low); + TensorUInt128 result(0); + while (power2 != TensorUInt128, static_val<0> >(0)) { + if (tmp >= d) { + tmp = tmp - d; + result = result + power2; + } + // Shift right + power2 = TensorUInt128(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); + d = TensorUInt128(d.high >> 1, (d.low >> 1) | (d.high << 63)); + } + + return result; + } +} + + +} // namespace internal +} // namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index cc4ce1c59..97257b183 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -117,6 +117,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_of_const_values "-std=c++0x") ei_add_test(cxx11_tensor_of_complex "-std=c++0x") ei_add_test(cxx11_tensor_of_strings "-std=c++0x") + ei_add_test(cxx11_tensor_uint128 "-std=c++0x") ei_add_test(cxx11_tensor_intdiv "-std=c++0x") ei_add_test(cxx11_tensor_lvalue "-std=c++0x") ei_add_test(cxx11_tensor_map "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_uint128.cpp b/unsupported/test/cxx11_tensor_uint128.cpp new file mode 100644 index 000000000..38fad192b --- /dev/null +++ b/unsupported/test/cxx11_tensor_uint128.cpp @@ -0,0 +1,144 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using Eigen::internal::TensorUInt128; +using Eigen::internal::static_val; + +static void VERIFY_EQUAL(TensorUInt128 actual, __uint128_t expected) { + bool matchl = actual.lower() == static_cast(expected); + bool matchh = actual.upper() == static_cast(expected >> 64); + if (!matchl || !matchh) { + const char* testname = g_test_stack.back().c_str(); + std::cerr << "Test " << testname << " failed in " << __FILE__ + << " (" << __LINE__ << ")" + << std::endl; + abort(); + } +} + + +static void test_add() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i + j; + __uint128_t expected = a + b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_sub() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i - j; + __uint128_t expected = a - b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_mul() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i * j; + __uint128_t expected = a * b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_div() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128 i(i1, i2); + __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128 j(j1, j2); + __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2); + TensorUInt128 actual = i / j; + __uint128_t expected = a / b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +static void test_misc1() { + uint64_t incr = internal::random(1, 9999999999); + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128, uint64_t> i(0, i2); + __uint128_t a = static_cast<__uint128_t>(i2); + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128, uint64_t> j(0, j2); + __uint128_t b = static_cast<__uint128_t>(j2); + uint64_t actual = (i * j).upper(); + uint64_t expected = (a * b) >> 64; + VERIFY_IS_EQUAL(actual, expected); + } + } +} + +static void test_misc2() { + int64_t incr = internal::random(1, 100); + for (int64_t log_div = 0; log_div < 63; ++log_div) { + for (int64_t divider = 1; divider <= 1000000 * incr; divider += incr) { + uint64_t expected = (static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1; + uint64_t shift = 1ULL << log_div; + + TensorUInt128 result = (TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) - TensorUInt128, static_val<0> >(1, 0) + TensorUInt128, static_val<1> >(1)); + uint64_t actual = static_cast(result); + VERIFY_EQUAL(actual, expected); + } + } +} + + +void test_cxx11_tensor_uint128() +{ + CALL_SUBTEST(test_add()); + CALL_SUBTEST(test_sub()); + CALL_SUBTEST(test_mul()); + CALL_SUBTEST(test_div()); + CALL_SUBTEST(test_misc1()); + CALL_SUBTEST(test_misc2()); +} From e2859c6b7159e8eee1326507c2767aea9fe1822a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Nov 2015 14:07:50 -0800 Subject: [PATCH 237/344] Cleanup the integer division test --- unsupported/test/cxx11_tensor_intdiv.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp index fd6d27ae1..c6e1890df 100644 --- a/unsupported/test/cxx11_tensor_intdiv.cpp +++ b/unsupported/test/cxx11_tensor_intdiv.cpp @@ -116,8 +116,7 @@ void test_powers_64bit() { if (start_num < 0) start_num = 0; for (int64_t num = start_num; num < end_num; num++) { - Eigen::internal::TensorIntDivisor divider = - Eigen::internal::TensorIntDivisor(div); + Eigen::internal::TensorIntDivisor divider(div); int64_t result = num/div; int64_t result_op = divider.divide(num); VERIFY_IS_EQUAL(result_op, result); @@ -130,8 +129,7 @@ void test_specific() { // A particular combination that was previously failing int64_t div = 209715200; int64_t num = 3238002688; - Eigen::internal::TensorIntDivisor divider = - Eigen::internal::TensorIntDivisor(div); + Eigen::internal::TensorIntDivisor divider(div); int64_t result = num/div; int64_t result_op = divider.divide(num); VERIFY_IS_EQUAL(result, result_op); From 04f1284f9a9ba38c3b23ccbd1168b874e5edf33a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Nov 2015 14:08:08 -0800 Subject: [PATCH 238/344] Shard the uint128 test --- unsupported/test/cxx11_tensor_uint128.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unsupported/test/cxx11_tensor_uint128.cpp b/unsupported/test/cxx11_tensor_uint128.cpp index 38fad192b..53c09ff0b 100644 --- a/unsupported/test/cxx11_tensor_uint128.cpp +++ b/unsupported/test/cxx11_tensor_uint128.cpp @@ -135,10 +135,10 @@ static void test_misc2() { void test_cxx11_tensor_uint128() { - CALL_SUBTEST(test_add()); - CALL_SUBTEST(test_sub()); - CALL_SUBTEST(test_mul()); - CALL_SUBTEST(test_div()); - CALL_SUBTEST(test_misc1()); - CALL_SUBTEST(test_misc2()); + CALL_SUBTEST_1(test_add()); + CALL_SUBTEST_2(test_sub()); + CALL_SUBTEST_3(test_mul()); + CALL_SUBTEST_4(test_div()); + CALL_SUBTEST_5(test_misc1()); + CALL_SUBTEST_6(test_misc2()); } From f37a5f1c539a7545579b06489c16df42005bd819 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Nov 2015 14:34:26 -0800 Subject: [PATCH 239/344] Fixed compilation error triggered by nvcc --- unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h index 2b0808629..f5cca0ad7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -130,8 +130,8 @@ static TensorUInt128 operator * (const TensorUInt128 // de // The result is stored in 2 64bit integers, high and low. - static const uint64_t LOW = 0x00000000FFFFFFFFLL; - static const uint64_t HIGH = 0xFFFFFFFF00000000LL; + const uint64_t LOW = 0x00000000FFFFFFFFLL; + const uint64_t HIGH = 0xFFFFFFFF00000000LL; uint64_t d = lhs.low & LOW; uint64_t c = (lhs.low & HIGH) >> 32LL; From 66ff9b2c6cdb6b0b105686556e6ef4a87bb9269f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Nov 2015 15:40:32 -0800 Subject: [PATCH 240/344] Fixed compilation warning generated by clang --- unsupported/test/cxx11_tensor_uint128.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/unsupported/test/cxx11_tensor_uint128.cpp b/unsupported/test/cxx11_tensor_uint128.cpp index 53c09ff0b..ee3767e58 100644 --- a/unsupported/test/cxx11_tensor_uint128.cpp +++ b/unsupported/test/cxx11_tensor_uint128.cpp @@ -14,7 +14,7 @@ using Eigen::internal::TensorUInt128; using Eigen::internal::static_val; -static void VERIFY_EQUAL(TensorUInt128 actual, __uint128_t expected) { +void VERIFY_EQUAL(TensorUInt128 actual, __uint128_t expected) { bool matchl = actual.lower() == static_cast(expected); bool matchh = actual.upper() == static_cast(expected >> 64); if (!matchl || !matchh) { @@ -27,7 +27,7 @@ static void VERIFY_EQUAL(TensorUInt128 actual, __uint128_t e } -static void test_add() { +void test_add() { uint64_t incr = internal::random(1, 9999999999); for (uint64_t i1 = 0; i1 < 100; ++i1) { for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { @@ -46,7 +46,7 @@ static void test_add() { } } -static void test_sub() { +void test_sub() { uint64_t incr = internal::random(1, 9999999999); for (uint64_t i1 = 0; i1 < 100; ++i1) { for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { @@ -65,7 +65,7 @@ static void test_sub() { } } -static void test_mul() { +void test_mul() { uint64_t incr = internal::random(1, 9999999999); for (uint64_t i1 = 0; i1 < 100; ++i1) { for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { @@ -84,7 +84,7 @@ static void test_mul() { } } -static void test_div() { +void test_div() { uint64_t incr = internal::random(1, 9999999999); for (uint64_t i1 = 0; i1 < 100; ++i1) { for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { @@ -103,7 +103,7 @@ static void test_div() { } } -static void test_misc1() { +void test_misc1() { uint64_t incr = internal::random(1, 9999999999); for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { TensorUInt128, uint64_t> i(0, i2); @@ -118,7 +118,7 @@ static void test_misc1() { } } -static void test_misc2() { +void test_misc2() { int64_t incr = internal::random(1, 100); for (int64_t log_div = 0; log_div < 63; ++log_div) { for (int64_t divider = 1; divider <= 1000000 * incr; divider += incr) { From 0ad7c7b1adc142fec4376ccdfdb4e88e4820934d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 19 Nov 2015 15:52:51 -0800 Subject: [PATCH 241/344] Fixed another clang compilation warning --- unsupported/test/cxx11_tensor_intdiv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp index c6e1890df..48aa6d368 100644 --- a/unsupported/test/cxx11_tensor_intdiv.cpp +++ b/unsupported/test/cxx11_tensor_intdiv.cpp @@ -15,10 +15,10 @@ void test_signed_32bit() { // Divide by one - const Eigen::internal::TensorIntDivisor div(1); + const Eigen::internal::TensorIntDivisor div_by_one(1); for (int32_t j = 0; j < 25000; ++j) { - const int32_t fast_div = j / div; + const int32_t fast_div = j / div_by_one; const int32_t slow_div = j / 1; VERIFY_IS_EQUAL(fast_div, slow_div); } From e52d4f8d8d11137b5e8574c1fcaf9aae0c865128 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2015 13:54:28 +0100 Subject: [PATCH 242/344] Add is_integral<> type traits --- Eigen/src/Core/util/Meta.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index e7fa2a1f1..15b80abd9 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -73,6 +73,18 @@ template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; +template struct is_integral { enum { value = false }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; + template struct add_const { typedef const T type; }; template struct add_const { typedef T& type; }; From e1b27bcb0bc9383eaaa93a98b635a1d880eb8b19 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2015 13:55:34 +0100 Subject: [PATCH 243/344] Workaround MSVC missing overloads of std::fpclassify for integral types --- Eigen/src/Core/MathFunctions.h | 226 ++++++++++++++++++--------------- 1 file changed, 125 insertions(+), 101 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 2ac6f4c67..5187b9c90 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -675,6 +675,128 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); } +// Implementatin of is* functions + +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T &x) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T &x) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T &x) { return true; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isfinite; + return isfinite EIGEN_NOT_A_MACRO (x); + #else + return x::highest() && x>NumTraits::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); + #else + return x != x; + #endif +} + +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +template<> EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } +template<> EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } + +template<> EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +template<> EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) +#else + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode: + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + +template +bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + } // end namespace internal /**************************************************************************** @@ -818,107 +940,9 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } -// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. -#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) -#define EIGEN_USE_STD_FPCLASSIFY 1 -#else -#define EIGEN_USE_STD_FPCLASSIFY 0 -#endif - -template -EIGEN_DEVICE_FUNC -bool (isfinite)(const T& x) -{ - #if EIGEN_USE_STD_FPCLASSIFY - using std::isfinite; - return isfinite EIGEN_NOT_A_MACRO (x); - #else - return x::highest() && x>NumTraits::lowest(); - #endif -} - -template -EIGEN_DEVICE_FUNC -bool (isinf)(const T& x) -{ - #if EIGEN_USE_STD_FPCLASSIFY - using std::isinf; - return isinf EIGEN_NOT_A_MACRO (x); - #else - return x>NumTraits::highest() || x::lowest(); - #endif -} - -template -EIGEN_DEVICE_FUNC -bool (isnan)(const T& x) -{ - #if EIGEN_USE_STD_FPCLASSIFY - using std::isnan; - return isnan EIGEN_NOT_A_MACRO (x); - #else - return x != x; - #endif -} - -#if (!EIGEN_USE_STD_FPCLASSIFY) - -#if EIGEN_COMP_MSVC - -template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) -{ - return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; -} - -//MSVC defines a _isnan builtin function, but for double only -template<> EIGEN_DEVICE_FUNC inline bool (isnan)(const long double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC inline bool (isnan)(const double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC inline bool (isnan)(const float& x) { return _isnan(x); } - -template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const long double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC inline bool (isinf)(const float& x) { return isinf_msvc_helper(x); } - -#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) - -#if EIGEN_GNUC_AT_LEAST(5,0) - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) -#else - // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), - // while the second prevent too aggressive optimizations in fast-math mode: - #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) -#endif - -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const long double& x) { return __builtin_isnan(x); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const double& x) { return __builtin_isnan(x); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isnan)(const float& x) { return __builtin_isnan(x); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const double& x) { return __builtin_isinf(x); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const float& x) { return __builtin_isinf(x); } -template<> EIGEN_TMP_NOOPT_ATTRIB bool (isinf)(const long double& x) { return __builtin_isinf(x); } - -#undef EIGEN_TMP_NOOPT_ATTRIB - -#endif - -#endif - -template -bool (isfinite)(const std::complex& x) -{ - return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); -} - -template -bool (isnan)(const std::complex& x) -{ - return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); -} - -template -bool (isinf)(const std::complex& x) -{ - return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); -} +template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } +template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } +template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } template EIGEN_DEVICE_FUNC From 5c9c0dca4dcf6c62b5b63c529f18421df3d4f36e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2015 14:51:36 +0100 Subject: [PATCH 244/344] Add missing using statement to enable fast Array / real operations. (was ok for Matrix only) --- Eigen/src/Core/ArrayBase.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 66813c8ea..b4c24a27a 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -53,6 +53,7 @@ template class ArrayBase typedef DenseBase Base; using Base::operator*; + using Base::operator/; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; From 4a985e793c6c6f822c18243f8c10508e4a8b635c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2015 14:52:08 +0100 Subject: [PATCH 245/344] Workaround msvc broken complex/complex division in unit test --- test/array.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/array.cpp b/test/array.cpp index e2b20f9e9..d437105da 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -365,11 +365,15 @@ template void array_complex(const ArrayType& m) std::complex zero(0.0,0.0); VERIFY((Eigen::isnan)(m1*zero/zero).all()); +#if EIGEN_COMP_MSVC + // msvc complex division is not robust + VERIFY((Eigen::isinf)(m4/RealScalar(0)).all()); +#else #if EIGEN_COMP_CLANG - // clang's complex division is notoriously broken + // clang's complex division is notoriously broken too if((numext::isinf)(m4(0,0)/RealScalar(0))) { #endif - VERIFY((Eigen::isinf)(m4/zero).all()); + VERIFY((Eigen::isinf)(m4/zero).all()); #if EIGEN_COMP_CLANG } else @@ -377,6 +381,8 @@ template void array_complex(const ArrayType& m) VERIFY((Eigen::isinf)(m4.real()/zero.real()).all()); } #endif +#endif // MSVC + VERIFY(((Eigen::isfinite)(m1) && (!(Eigen::isfinite)(m1*zero/zero)) && (!(Eigen::isfinite)(m1/zero))).all()); VERIFY_IS_APPROX(inverse(inverse(m1)),m1); From 4fc36079e778a4944af27f1d71e41fa45bae197e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2015 15:29:03 +0100 Subject: [PATCH 246/344] Fix overload instantiation for clang --- Eigen/src/Core/MathFunctions.h | 51 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 5187b9c90..fe1dafe83 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -687,21 +687,21 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() template EIGEN_DEVICE_FUNC typename internal::enable_if::value,bool>::type -isnan_impl(const T &x) { return false; } +isnan_impl(const T&) { return false; } template EIGEN_DEVICE_FUNC typename internal::enable_if::value,bool>::type -isinf_impl(const T &x) { return false; } +isinf_impl(const T&) { return false; } template EIGEN_DEVICE_FUNC typename internal::enable_if::value,bool>::type -isfinite_impl(const T &x) { return true; } +isfinite_impl(const T&) { return true; } template EIGEN_DEVICE_FUNC -typename internal::enable_if::value,bool>::type +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type isfinite_impl(const T& x) { #if EIGEN_USE_STD_FPCLASSIFY @@ -714,7 +714,7 @@ isfinite_impl(const T& x) template EIGEN_DEVICE_FUNC -typename internal::enable_if::value,bool>::type +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type isinf_impl(const T& x) { #if EIGEN_USE_STD_FPCLASSIFY @@ -727,7 +727,7 @@ isinf_impl(const T& x) template EIGEN_DEVICE_FUNC -typename internal::enable_if::value,bool>::type +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type isnan_impl(const T& x) { #if EIGEN_USE_STD_FPCLASSIFY @@ -779,23 +779,10 @@ template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return #endif -template -bool isfinite_impl(const std::complex& x) -{ - return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); -} - -template -bool isnan_impl(const std::complex& x) -{ - return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); -} - -template -bool isinf_impl(const std::complex& x) -{ - return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); -} +// The following overload are defined at the end of this file +template bool isfinite_impl(const std::complex& x); +template bool isnan_impl(const std::complex& x); +template bool isinf_impl(const std::complex& x); } // end namespace internal @@ -986,6 +973,24 @@ inline int log2(int x) namespace internal { +template +bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + /**************************************************************************** * Implementation of fuzzy comparisons * ****************************************************************************/ From 4522ffd17c9c6cb74049263ea9de0f8a9d35e2b2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2015 15:29:32 +0100 Subject: [PATCH 247/344] Add regression using test for array/real --- test/linearstructure.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index 3c7cdbe41..292f33969 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -108,9 +108,11 @@ void test_linearstructure() CALL_SUBTEST_7( linearStructure(MatrixXi (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_8( linearStructure(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_9( linearStructure(ArrayXXf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_10( linearStructure(ArrayXXcf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_10( real_complex() ); - CALL_SUBTEST_10( real_complex(10,10) ); + CALL_SUBTEST_11( real_complex() ); + CALL_SUBTEST_11( real_complex(10,10) ); + CALL_SUBTEST_11( real_complex(10,10) ); } #ifdef EIGEN_TEST_PART_4 From 027a846b3404339ec223daa3178e46a0c676e5ad Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 20 Nov 2015 15:30:10 +0100 Subject: [PATCH 248/344] Use .data() instead of &coeffRef(0). --- Eigen/src/SuperLUSupport/SuperLUSupport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index 7c644eef6..afb5904e0 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -659,7 +659,7 @@ void SuperLU::_solve_impl(const MatrixBase &b, MatrixBase &m_sluStat, &info, Scalar()); StatFree(&m_sluStat); - if(&x.coeffRef(0) != x_ref.data()) + if(x.derived().data() != x_ref.data()) x = x_ref; m_info = info==0 ? Success : NumericalIssue; From 4946d758c932ea218046371cb94b15451107b957 Mon Sep 17 00:00:00 2001 From: Chris Jones Date: Fri, 20 Nov 2015 19:58:08 +0100 Subject: [PATCH 249/344] Use a class constructor to initialize CPU cache sizes Using a static instance of a class to initialize the values for the CPU cache sizes guarantees thread-safe initialization of the values when using C++11. Therefore under C++11 it is no longer necessary to call Eigen::initParallel() before calling any eigen functions on different threads. --- .../Core/products/GeneralBlockPanelKernel.h | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 79eaa7432..94754bf66 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -36,37 +36,40 @@ const std::ptrdiff_t defaultL3CacheSize = 512*1024; #endif /** \internal */ -inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) -{ - static bool m_cache_sizes_initialized = false; - static std::ptrdiff_t m_l1CacheSize = 0; - static std::ptrdiff_t m_l2CacheSize = 0; - static std::ptrdiff_t m_l3CacheSize = 0; - - if(!m_cache_sizes_initialized) - { +struct CacheSizes { + CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) { int l1CacheSize, l2CacheSize, l3CacheSize; queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize); - m_l1CacheSize = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize); - m_l2CacheSize = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize); - m_l3CacheSize = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize); - m_cache_sizes_initialized = true; + m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize); + m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize); + m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize); } + std::ptrdiff_t m_l1; + std::ptrdiff_t m_l2; + std::ptrdiff_t m_l3; +}; + + +/** \internal */ +inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3) +{ + static CacheSizes m_cacheSizes; + if(action==SetAction) { // set the cpu cache size and cache all block sizes from a global cache size in byte eigen_internal_assert(l1!=0 && l2!=0); - m_l1CacheSize = *l1; - m_l2CacheSize = *l2; - m_l3CacheSize = *l3; + m_cacheSizes.m_l1 = *l1; + m_cacheSizes.m_l2 = *l2; + m_cacheSizes.m_l3 = *l3; } else if(action==GetAction) { eigen_internal_assert(l1!=0 && l2!=0); - *l1 = m_l1CacheSize; - *l2 = m_l2CacheSize; - *l3 = m_l3CacheSize; + *l1 = m_cacheSizes.m_l1; + *l2 = m_cacheSizes.m_l2; + *l3 = m_cacheSizes.m_l3; } else { From 383d1cc2ed76d1757a45cd0b2d6559dee7e2ee1b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 20 Nov 2015 11:09:46 -0800 Subject: [PATCH 250/344] Added proper support for fast 64bit integer division on CUDA --- .../Eigen/CXX11/src/Tensor/TensorIntDiv.h | 33 ++++++++----------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 058fb2c42..81c661269 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -61,13 +61,8 @@ namespace { template struct DividerTraits { -#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__) typedef typename UnsignedTraits::type type; static const int N = sizeof(T) * 8; -#else - typedef uint32_t type; - static const int N = 32; -#endif }; template @@ -79,40 +74,38 @@ namespace { #endif } -#if defined(__CUDA_ARCH__) - template - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { - return __umul64hi(a, b); - } -#else template - EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { -#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__) + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { +#if defined(__CUDA_ARCH__) + return __umul64hi(a, b); +#elif defined(__SIZEOF_INT128__) __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); return static_cast(v >> 64); #else - EIGEN_STATIC_ASSERT(sizeof(T) == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - return (a * b) >> 32; + return (TensorUInt128, uint64_t>(a) * TensorUInt128, uint64_t>(b)).upper(); #endif } -#endif template struct DividerHelper { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier (const int log_div, const T divider) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); return static_cast((static_cast(1) << (N+log_div)) / divider - (static_cast(1) << N) + 1); } }; -#if defined(__SIZEOF_INT128__) && !defined(__CUDACC__) template struct DividerHelper<64, T> { - static EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { +#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) return static_cast((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); +#else + const uint64_t shift = 1ULL << log_div; + TensorUInt128 result = (TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) - TensorUInt128, static_val<0> >(1, 0) + TensorUInt128, static_val<1> >(1)); + return static_cast(result); +#endif } }; -#endif } From a367804856cf3a39d9d43d10ec3ba2e335a8ec3a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 20 Nov 2015 12:41:40 -0800 Subject: [PATCH 251/344] Added option to force the usage of the Eigen array class instead of the std::array class. --- unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h index 9a239ef6d..ab9c2ec3e 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateArray.h @@ -15,7 +15,7 @@ // The array class is only available starting with cxx11. Emulate our own here // if needed. // Moreover, CUDA doesn't support the STL containers, so we use our own instead. -#if __cplusplus <= 199711L || defined(__CUDACC__) +#if __cplusplus <= 199711L || defined(__CUDACC__) || defined(EIGEN_AVOID_STL_ARRAY) namespace Eigen { template class array { From 9fa65d38383e203ba968de819697941096c86f03 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 20 Nov 2015 17:42:50 -0800 Subject: [PATCH 252/344] Split TensorDeviceType.h in 3 files to make it more manageable --- unsupported/Eigen/CXX11/Tensor | 4 +- ...{TensorDeviceType.h => TensorDeviceCuda.h} | 266 +----------------- .../CXX11/src/Tensor/TensorDeviceDefault.h | 61 ++++ .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 224 +++++++++++++++ 4 files changed, 290 insertions(+), 265 deletions(-) rename unsupported/Eigen/CXX11/src/Tensor/{TensorDeviceType.h => TensorDeviceCuda.h} (52%) create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 7e59af964..17c4325b8 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -60,7 +60,9 @@ #include "src/Tensor/TensorMacros.h" #include "src/Tensor/TensorForwardDeclarations.h" #include "src/Tensor/TensorMeta.h" -#include "src/Tensor/TensorDeviceType.h" +#include "src/Tensor/TensorDeviceDefault.h" +#include "src/Tensor/TensorDeviceThreadPool.h" +#include "src/Tensor/TensorDeviceCuda.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" #include "src/Tensor/TensorDimensions.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h similarity index 52% rename from unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h rename to unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 71fd4a6af..7d80d0b91 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -7,272 +7,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H +#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H namespace Eigen { -// Default device for the machine (typically a single cpu core) -struct DefaultDevice { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { -#ifndef __CUDA_ARCH__ - // Running on the host CPU - return 1; -#else - // Running on a CUDA device - return 32; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ - // Running single threaded on the host CPU - // Should return an enum that encodes the ISA supported by the CPU - return 1; -#else - // Running on a CUDA device - return __CUDA_ARCH__ / 100; -#endif - } -}; - - -// Multiple cpu cores -// We should really use a thread pool here but first we need to find a portable thread pool library. -#ifdef EIGEN_USE_THREADS - -// This defines an interface that ThreadPoolDevice can take to use -// custom thread pools underneath. -class ThreadPoolInterface { - public: - virtual void Schedule(std::function fn) = 0; - - virtual ~ThreadPoolInterface() {} -}; - -// The implementation of the ThreadPool type ensures that the Schedule method -// runs the functions it is provided in FIFO order when the scheduling is done -// by a single thread. -class ThreadPool : public ThreadPoolInterface { - public: - // Construct a pool that contains "num_threads" threads. - explicit ThreadPool(int num_threads) { - for (int i = 0; i < num_threads; i++) { - threads_.push_back(new std::thread([this]() { WorkerLoop(); })); - } - } - - // Wait until all scheduled work has finished and then destroy the - // set of threads. - ~ThreadPool() - { - { - // Wait for all work to get done. - std::unique_lock l(mu_); - empty_.wait(l, [this]() { return pending_.empty(); }); - exiting_ = true; - - // Wakeup all waiters. - for (auto w : waiters_) { - w->ready = true; - w->work = nullptr; - w->cv.notify_one(); - } - } - - // Wait for threads to finish. - for (auto t : threads_) { - t->join(); - delete t; - } - } - - // Schedule fn() for execution in the pool of threads. The functions are - // executed in the order in which they are scheduled. - void Schedule(std::function fn) { - std::unique_lock l(mu_); - if (waiters_.empty()) { - pending_.push_back(fn); - } else { - Waiter* w = waiters_.back(); - waiters_.pop_back(); - w->ready = true; - w->work = fn; - w->cv.notify_one(); - } - } - - protected: - void WorkerLoop() { - std::unique_lock l(mu_); - Waiter w; - while (!exiting_) { - std::function fn; - if (pending_.empty()) { - // Wait for work to be assigned to me - w.ready = false; - waiters_.push_back(&w); - w.cv.wait(l, [&w]() { return w.ready; }); - fn = w.work; - w.work = nullptr; - } else { - // Pick up pending work - fn = pending_.front(); - pending_.pop_front(); - if (pending_.empty()) { - empty_.notify_all(); - } - } - if (fn) { - mu_.unlock(); - fn(); - mu_.lock(); - } - } - } - - private: - struct Waiter { - std::condition_variable cv; - std::function work; - bool ready; - }; - - std::mutex mu_; - std::vector threads_; // All threads - std::vector waiters_; // Stack of waiting threads. - std::deque> pending_; // Queue of pending work - std::condition_variable empty_; // Signaled on pending_.empty() - bool exiting_ = false; -}; - - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object. -// but only one caller must call Notify() on the object. -class Notification { - public: - Notification() : notified_(false) {} - ~Notification() {} - - void Notify() { - std::unique_lock l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void WaitForNotification() { - std::unique_lock l(mu_); - cv_.wait(l, [this]() { return notified_; } ); - } - - private: - std::mutex mu_; - std::condition_variable cv_; - bool notified_; -}; - -// Runs an arbitrary function and then calls Notify() on the passed in -// Notification. -template struct FunctionWrapper -{ - static void run(Notification* n, Function f, Args... args) { - f(args...); - n->Notify(); - } -}; - -static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { - if (n) { - n->WaitForNotification(); - } -} - - -// Build a thread pool device on top the an existing pool of threads. -struct ThreadPoolDevice { - // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } - - EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - - EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - - EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - - EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_STRONG_INLINE size_t numThreads() const { - return num_threads_; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { - // Should return an enum that encodes the ISA supported by the CPU - return 1; - } - - template - EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { - Notification* n = new Notification(); - std::function func = - std::bind(&FunctionWrapper::run, n, f, args...); - pool_->Schedule(func); - return n; - } - template - EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { - std::function func = std::bind(f, args...); - pool_->Schedule(func); - } - - private: - ThreadPoolInterface* pool_; - size_t num_threads_; -}; - -#endif - - -// GPU offloading -#ifdef EIGEN_USE_GPU - // This defines an interface that GPUDevice can take to use // CUDA streams underneath. class StreamInterface { @@ -515,8 +255,6 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { } #endif -#endif - } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h new file mode 100644 index 000000000..267f6f8e3 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU + return 1; +#else + // Running on a CUDA device + return 32; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + // Should return an enum that encodes the ISA supported by the CPU + return 1; +#else + // Running on a CUDA device + return __CUDA_ARCH__ / 100; +#endif + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h new file mode 100644 index 000000000..dcbef5b03 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -0,0 +1,224 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H + +namespace Eigen { + +// This defines an interface that ThreadPoolDevice can take to use +// custom thread pools underneath. +class ThreadPoolInterface { + public: + virtual void Schedule(std::function fn) = 0; + + virtual ~ThreadPoolInterface() {} +}; + +// The implementation of the ThreadPool type ensures that the Schedule method +// runs the functions it is provided in FIFO order when the scheduling is done +// by a single thread. +class ThreadPool : public ThreadPoolInterface { + public: + // Construct a pool that contains "num_threads" threads. + explicit ThreadPool(int num_threads) { + for (int i = 0; i < num_threads; i++) { + threads_.push_back(new std::thread([this]() { WorkerLoop(); })); + } + } + + // Wait until all scheduled work has finished and then destroy the + // set of threads. + ~ThreadPool() + { + { + // Wait for all work to get done. + std::unique_lock l(mu_); + empty_.wait(l, [this]() { return pending_.empty(); }); + exiting_ = true; + + // Wakeup all waiters. + for (auto w : waiters_) { + w->ready = true; + w->work = nullptr; + w->cv.notify_one(); + } + } + + // Wait for threads to finish. + for (auto t : threads_) { + t->join(); + delete t; + } + } + + // Schedule fn() for execution in the pool of threads. The functions are + // executed in the order in which they are scheduled. + void Schedule(std::function fn) { + std::unique_lock l(mu_); + if (waiters_.empty()) { + pending_.push_back(fn); + } else { + Waiter* w = waiters_.back(); + waiters_.pop_back(); + w->ready = true; + w->work = fn; + w->cv.notify_one(); + } + } + + protected: + void WorkerLoop() { + std::unique_lock l(mu_); + Waiter w; + while (!exiting_) { + std::function fn; + if (pending_.empty()) { + // Wait for work to be assigned to me + w.ready = false; + waiters_.push_back(&w); + w.cv.wait(l, [&w]() { return w.ready; }); + fn = w.work; + w.work = nullptr; + } else { + // Pick up pending work + fn = pending_.front(); + pending_.pop_front(); + if (pending_.empty()) { + empty_.notify_all(); + } + } + if (fn) { + mu_.unlock(); + fn(); + mu_.lock(); + } + } + } + + private: + struct Waiter { + std::condition_variable cv; + std::function work; + bool ready; + }; + + std::mutex mu_; + std::vector threads_; // All threads + std::vector waiters_; // Stack of waiting threads. + std::deque> pending_; // Queue of pending work + std::condition_variable empty_; // Signaled on pending_.empty() + bool exiting_ = false; +}; + + +// Notification is an object that allows a user to to wait for another +// thread to signal a notification that an event has occurred. +// +// Multiple threads can wait on the same Notification object. +// but only one caller must call Notify() on the object. +class Notification { + public: + Notification() : notified_(false) {} + ~Notification() {} + + void Notify() { + std::unique_lock l(mu_); + eigen_assert(!notified_); + notified_ = true; + cv_.notify_all(); + } + + void WaitForNotification() { + std::unique_lock l(mu_); + cv_.wait(l, [this]() { return notified_; } ); + } + + private: + std::mutex mu_; + std::condition_variable cv_; + bool notified_; +}; + +// Runs an arbitrary function and then calls Notify() on the passed in +// Notification. +template struct FunctionWrapper +{ + static void run(Notification* n, Function f, Args... args) { + f(args...); + n->Notify(); + } +}; + +static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { + if (n) { + n->WaitForNotification(); + } +} + + +// Build a thread pool device on top the an existing pool of threads. +struct ThreadPoolDevice { + // The ownership of the thread pool remains with the caller. + ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + + EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_STRONG_INLINE size_t numThreads() const { + return num_threads_; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + // Should return an enum that encodes the ISA supported by the CPU + return 1; + } + + template + EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { + Notification* n = new Notification(); + std::function func = + std::bind(&FunctionWrapper::run, n, f, args...); + pool_->Schedule(func); + return n; + } + template + EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { + std::function func = std::bind(f, args...); + pool_->Schedule(func); + } + + private: + ThreadPoolInterface* pool_; + size_t num_threads_; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H From b265979a70ba06f4d4e8ba737d5d16bfd5c27ea3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 21 Nov 2015 15:03:04 +0100 Subject: [PATCH 253/344] Make FullPivLU::solve use rank() instead of nonzeroPivots(). --- Eigen/src/LU/FullPivLU.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 07a87cbc6..498df8adc 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -720,7 +720,7 @@ void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const const Index rows = this->rows(), cols = this->cols(), - nonzero_pivots = this->nonzeroPivots(); + nonzero_pivots = this->rank(); eigen_assert(rhs.rows() == rows); const Index smalldim = (std::min)(rows, cols); From 35c17a3fc8f61471590a4cf8376a6aa530e2e4bc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 22 Nov 2015 22:09:57 +0100 Subject: [PATCH 254/344] Use overload instead of template full specialization to please old MSVC --- Eigen/src/Core/MathFunctions.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index fe1dafe83..48cf565fb 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -748,13 +748,13 @@ template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) } //MSVC defines a _isnan builtin function, but for double only -template<> EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } -template<> EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } -template<> EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } #elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) From 8a2659f0cb2570c55d39036104860c656c9d3096 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 23 Nov 2015 10:53:55 +0100 Subject: [PATCH 255/344] Improve numerical robustness of some unit tests --- test/geo_transformations.cpp | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp index d50c7c76a..94ed155ef 100644 --- a/test/geo_transformations.cpp +++ b/test/geo_transformations.cpp @@ -12,6 +12,12 @@ #include #include +template +Matrix angleToVec(T a) +{ + return Matrix(std::cos(a), std::sin(a)); +} + template void non_projective_only() { /* this test covers the following files: @@ -130,14 +136,16 @@ template void transformations() AngleAxisx aa = AngleAxisx(q1); VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - if(abs(aa.angle()) > NumTraits::dummy_precision()) + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) { VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); } aa.fromRotationMatrix(aa.toRotationMatrix()); VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - if(abs(aa.angle()) > NumTraits::dummy_precision()) + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) { VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); } @@ -214,7 +222,9 @@ template void transformations() t4 *= aa3; VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); - v3 = Vector3::Random(); + do { + v3 = Vector3::Random(); + } while (v3.cwiseAbs().minCoeff()::epsilon()); Translation3 tv3(v3); Transform3 t5(tv3); t4 = tv3; @@ -414,14 +424,12 @@ template void transformations() Scalar angle = internal::random(-100,100); Rotation2D rot2(angle); VERIFY( rot2.smallestPositiveAngle() >= 0 ); - VERIFY( rot2.smallestPositiveAngle() < Scalar(2)*Scalar(EIGEN_PI) ); - VERIFY_IS_APPROX( std::cos(rot2.smallestPositiveAngle()), std::cos(rot2.angle()) ); - VERIFY_IS_APPROX( std::sin(rot2.smallestPositiveAngle()), std::sin(rot2.angle()) ); + VERIFY( rot2.smallestPositiveAngle() <= Scalar(2)*Scalar(EIGEN_PI) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestPositiveAngle()), angleToVec(rot2.angle()) ); VERIFY( rot2.smallestAngle() >= -Scalar(EIGEN_PI) ); VERIFY( rot2.smallestAngle() <= Scalar(EIGEN_PI) ); - VERIFY_IS_APPROX( std::cos(rot2.smallestAngle()), std::cos(rot2.angle()) ); - VERIFY_IS_APPROX( std::sin(rot2.smallestAngle()), std::sin(rot2.angle()) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot2.angle()) ); } s0 = internal::random(-100,100); @@ -437,7 +445,7 @@ template void transformations() VERIFY_IS_APPROX(t20,t21); VERIFY_IS_APPROX(s0, (R0.slerp(0, R1)).angle()); - VERIFY_IS_APPROX(R1.smallestPositiveAngle(), (R0.slerp(1, R1)).smallestPositiveAngle()); + VERIFY_IS_APPROX( angleToVec(R1.smallestPositiveAngle()), angleToVec((R0.slerp(1, R1)).smallestPositiveAngle()) ); VERIFY_IS_APPROX(R0.smallestPositiveAngle(), (R0.slerp(0.5, R0)).smallestPositiveAngle()); if(std::cos(s0)>0) @@ -447,13 +455,14 @@ template void transformations() // Check path length Scalar l = 0; - for(int k=0; k<100; ++k) + int path_steps = 100; + for(int k=0; k::epsilon()*Scalar(path_steps/2))); // check basic features { From 31b661e4cad656611c322e7b61ad7b5e83c62207 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 23 Nov 2015 13:28:43 +0100 Subject: [PATCH 256/344] Add a note on initParallel being optional in C++11. --- doc/TopicMultithreading.dox | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/TopicMultithreading.dox b/doc/TopicMultithreading.dox index 95f6bf287..47c9b261f 100644 --- a/doc/TopicMultithreading.dox +++ b/doc/TopicMultithreading.dox @@ -43,6 +43,8 @@ int main(int argc, char** argv) } \endcode +\note With Eigen 3.3, and a fully C++11 compliant compiler (i.e., thread-safe static local variable initialization), then calling \c initParallel() is optional. + \warning note that all functions generating random matrices are \b not re-entrant nor thread-safe. Those include DenseBase::Random(), and DenseBase::setRandom() despite a call to Eigen::initParallel(). This is because these functions are based on std::rand which is not re-entrant. For thread-safe random generator, we recommend the use of boost::random or c++11 random feature. In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section. From f3dca16a1d4226982281dafa7a2f51c36698f6e5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 23 Nov 2015 14:07:52 +0100 Subject: [PATCH 257/344] bug #1117: workaround unused-local-typedefs warning when EIGEN_NO_STATIC_ASSERT and NDEBUG are both defined. --- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 90f702ee3..d9420ac63 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -39,10 +39,9 @@ class CwiseBinaryOpImpl EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) CwiseBinaryOpImpl() { - typedef typename internal::traits::StorageKind LhsStorageKind; - typedef typename internal::traits::StorageKind RhsStorageKind; EIGEN_STATIC_ASSERT(( - (!internal::is_same::value) + (!internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value) || ((Lhs::Flags&RowMajorBit) == (Rhs::Flags&RowMajorBit))), THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH); } From f9fff67a56656db4ca0a633b16a0896ee0fcfa77 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 23 Nov 2015 15:03:24 +0100 Subject: [PATCH 258/344] Disable "decorated name length exceeded, name was truncated" MSVC warning. --- Eigen/src/Core/util/DisableStupidWarnings.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index 6a0bf0629..46c141ad5 100644 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -10,6 +10,7 @@ // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data // 4273 - QtAlignedMalloc, inconsistent DLL linkage // 4324 - structure was padded due to declspec(align()) + // 4503 - decorated name length exceeded, name was truncated // 4512 - assignment operator could not be generated // 4522 - 'class' : multiple assignment operators specified // 4700 - uninitialized local variable 'xyz' used @@ -17,7 +18,7 @@ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4512 4522 4700 4717 ) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 ) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body From 1e0405901239eab520e1eb7c62dc8ac185a369b1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 23 Nov 2015 08:36:54 -0800 Subject: [PATCH 259/344] Deleted unused variable. --- unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h | 1 - 1 file changed, 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index 6a81d3c71..90ee50678 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -1147,7 +1147,6 @@ EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs, bool check_rhs = (base_n + 63) >= n_size; bool check_lhs128 = (base_m + 127) >= m_size; - bool check_lhs64 = (base_m + 63) >= m_size; if (!check_rhs) { if (!check_lhs128) { From df31ca3b9e038d9b83226a3ed3fe3c8a4cf16bdd Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 23 Nov 2015 10:03:53 -0800 Subject: [PATCH 260/344] Made it possible to refer t oa GPUDevice from code compile with a regular C++ compiler --- unsupported/Eigen/CXX11/Tensor | 6 +++--- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 17c4325b8..c681d3c20 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -8,8 +8,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_TENSOR_MODULE -#define EIGEN_CXX11_TENSOR_MODULE +//#ifndef EIGEN_CXX11_TENSOR_MODULE +//#define EIGEN_CXX11_TENSOR_MODULE #include "Core" @@ -117,4 +117,4 @@ #include -#endif // EIGEN_CXX11_TENSOR_MODULE +//#endif // EIGEN_CXX11_TENSOR_MODULE diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 7d80d0b91..c76d1ee3f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -27,7 +27,6 @@ class StreamInterface { virtual void deallocate(void* buffer) const = 0; }; -#if defined(__CUDACC__) static cudaDeviceProp* m_deviceProperties; static bool m_devicePropInitialized = false; @@ -107,7 +106,6 @@ class CudaStreamDevice : public StreamInterface { const cudaStream_t* stream_; int device_; }; -#endif // __CUDACC__ struct GpuDevice { // The StreamInterface is not owned: the caller is From 562078780a5511f33c6bb5639c5a93e56163a443 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 23 Nov 2015 11:00:10 -0800 Subject: [PATCH 261/344] Don't create more cuda blocks than necessary --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 956672771..d93e1de1b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -219,9 +219,11 @@ inline void TensorExecutor::run(const Expression& const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); const int block_size = device.maxCudaThreadsPerBlock(); + const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; const Index size = array_prod(evaluator.dimensions()); + // Create a least one block to ensure we won't crash if we're called with tensors of size 0. + const int num_blocks = numext::maxi(numext::mini(max_blocks, (size + block_size - 1) / block_size), 1); LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); } evaluator.cleanup(); @@ -236,9 +238,11 @@ inline void TensorExecutor::run(const Expression& e const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const int num_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / device.maxCudaThreadsPerBlock(); const int block_size = device.maxCudaThreadsPerBlock(); + const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; const Index size = array_prod(evaluator.dimensions()); + // Create a least one block to ensure we won't crash if we're called with tensors of size 0. + const int num_blocks = numext::maxi(numext::mini(max_blocks, (size + block_size - 1) / block_size), 1); LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); } evaluator.cleanup(); From 547a8608e5ff329c0f4e2da38c6eae023fc75647 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 23 Nov 2015 12:17:45 -0800 Subject: [PATCH 262/344] Fixed the implementation of Eigen::internal::count_leading_zeros for MSVC. Also updated the code to silence bogux warnings generated by nvcc when compilining this function. --- unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 81c661269..b58173e58 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -34,10 +34,7 @@ namespace { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val) { #ifdef __CUDA_ARCH__ - if (sizeof(T) == 8) { - return __clzll(val); - } - return __clz(val); + return (sizeof(T) == 8) ? __clzll(val) : __clz(val); #elif EIGEN_COMP_MSVC DWORD leading_zeros = 0; if (sizeof(T) == 8) { @@ -46,11 +43,11 @@ namespace { else { _BitScanReverse(&leading_zero, val); } + return leading_zeros; #else - if (sizeof(T) == 8) { - return __builtin_clzl(static_cast(val)); - } - return __builtin_clz(static_cast(val)); + return (sizeof(T) == 8) ? + __builtin_clzl(static_cast(val)) : + __builtin_clz(static_cast(val)); #endif } From 44848ac39bba2ba25514c6c897f5dc7bba1c76ae Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 23 Nov 2015 15:58:47 -0800 Subject: [PATCH 263/344] Fixed a bug in TensorArgMax.h --- .../Eigen/CXX11/src/Tensor/TensorArgMax.h | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h index d4f9a725d..c783aab97 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -215,10 +215,17 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_orig_impl(op.expression(), device), m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), - m_return_dim(op.return_dim()), - m_stride_mod(gen_stride_mod(m_orig_impl.dimensions())), - m_stride_div(gen_stride_div()) { + m_return_dim(op.return_dim()) { + gen_strides(m_orig_impl.dimensions(), m_strides); + if (Layout == static_cast(ColMajor)) { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size; + } else { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; + } + m_stride_div = m_strides[m_return_dim]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { @@ -263,25 +270,13 @@ struct TensorEvaluator, Devi } } - EIGEN_DEVICE_FUNC Index gen_stride_mod(const InputDimensions& dims) { - if (Layout == static_cast(ColMajor)) { - return (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : dims.TotalSize(); - } else { - return (m_return_dim > 0) ? m_strides[m_return_dim - 1] : dims.TotalSize(); - } - } - - EIGEN_DEVICE_FUNC Index gen_stride_div() { - return m_strides[m_return_dim]; - } - protected: TensorEvaluator, Device> m_orig_impl; TensorEvaluator >, Device> m_impl; const int m_return_dim; StrideDims m_strides; - const Index m_stride_mod; - const Index m_stride_div; + Index m_stride_mod; + Index m_stride_div; }; } // end namespace Eigen From 7ddcf97da7683d7149bef880ab3f1967ccf2a7ab Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 24 Nov 2015 17:15:07 -0500 Subject: [PATCH 264/344] added scalar_sign_op (both real,complex) --- Eigen/src/Core/GenericPacketMath.h | 4 +- Eigen/src/Core/GlobalFunctions.h | 1 + Eigen/src/Core/functors/UnaryFunctors.h | 38 +++++++++++++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 1 + Eigen/src/plugins/ArrayCwiseUnaryOps.h | 19 ++++++++++ Eigen/src/plugins/MatrixCwiseUnaryOps.h | 12 ++++++ .../Eigen/CXX11/src/Tensor/TensorBase.h | 6 +++ 7 files changed, 80 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index c767757b4..5f27d8166 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -77,7 +77,9 @@ struct default_packet_traits HasRound = 0, HasFloor = 0, - HasCeil = 0 + HasCeil = 0, + + HasSign = 0 }; }; diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index aaa076701..585974809 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -64,6 +64,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op) template inline const Eigen::CwiseUnaryOp, const Derived> diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index c897046bd..e6c665fb6 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -655,6 +655,44 @@ struct functor_traits > { }; }; +/** \internal + * \brief Template functor to compute the signum of a scalar + * \sa class CwiseUnaryOp, Cwise::sign() + */ +template::IsComplex!=0) > struct scalar_sign_op; +template +struct scalar_sign_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const + { + return Scalar( (a>Scalar(0)) - (a + //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); } +}; +template +struct scalar_sign_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const + { + typename NumTraits::Real aa = std::abs(a); + return (aa==0) ? Scalar(0) : (a/aa); + } + //TODO + //template + //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); } +}; +template +struct functor_traits > +{ enum { + Cost = + NumTraits::IsComplex + ? ( 8*NumTraits::MulCost ) // roughly + : ( 3*NumTraits::AddCost), + PacketAccess = packet_traits::HasSign + }; +}; } // end namespace internal diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 34697765d..1aa81abf8 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -208,6 +208,7 @@ template struct scalar_random_op; template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; +template struct scalar_sign_op; template struct scalar_product_op; template struct scalar_multiple2_op; diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 5a3c92ea2..60d56a252 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -4,6 +4,7 @@ typedef CwiseUnaryOp, const Derived> AbsReturnTy typedef CwiseUnaryOp, const Derived> ArgReturnType; typedef CwiseUnaryOp, const Derived> Abs2ReturnType; typedef CwiseUnaryOp, const Derived> SqrtReturnType; +typedef CwiseUnaryOp, const Derived> SignReturnType; typedef CwiseUnaryOp, const Derived> InverseReturnType; typedef CwiseUnaryOp, const Derived> BooleanNotReturnType; @@ -138,6 +139,24 @@ sqrt() const return SqrtReturnType(derived()); } +/** \returns an expression of the coefficient-wise signum of *this. + * + * This function computes the coefficient-wise signum. The function MatrixBase::sign() in the + * unsupported module MatrixFunctions computes the matrix square root. + * + * Example: \include Cwise_sign.cpp + * Output: \verbinclude Cwise_sign.out + * + * \sa pow(), square() + */ +EIGEN_DEVICE_FUNC +inline const SignReturnType +sign() const +{ + return SignReturnType(derived()); +} + + /** \returns an expression of the coefficient-wise cosine of *this. * * This function computes the coefficient-wise cosine. The function MatrixBase::cos() in the diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h index e339140bf..e16bb374b 100644 --- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h @@ -14,6 +14,7 @@ typedef CwiseUnaryOp, const Derived> CwiseAbsReturnType; typedef CwiseUnaryOp, const Derived> CwiseAbs2ReturnType; typedef CwiseUnaryOp, const Derived> CwiseSqrtReturnType; +typedef CwiseUnaryOp, const Derived> CwiseSignReturnType; typedef CwiseUnaryOp, const Derived> CwiseInverseReturnType; /** \returns an expression of the coefficient-wise absolute value of \c *this @@ -49,6 +50,17 @@ EIGEN_DEVICE_FUNC inline const CwiseSqrtReturnType cwiseSqrt() const { return CwiseSqrtReturnType(derived()); } +/** \returns an expression of the coefficient-wise signum of *this. + * + * Example: \include MatrixBase_cwiseSign.cpp + * Output: \verbinclude MatrixBase_cwiseSign.out + * + */ +EIGEN_DEVICE_FUNC +inline const CwiseSignReturnType +cwiseSign() const { return CwiseSignReturnType(derived()); } + + /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include MatrixBase_cwiseInverse.cpp diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index e7b09e562..5ec1deaf8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -86,6 +86,12 @@ class TensorBase return unaryExpr(internal::scalar_sqrt_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sign() const { + return unaryExpr(internal::scalar_sign_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> rsqrt() const { From 91a70594595eec1fe9baeae65dca8189f1210d1a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 10:06:07 +0100 Subject: [PATCH 265/344] bug #1009, part 1/2: make sure vector expressions expose LinearAccessBit flag. --- Eigen/src/Core/CoreEvaluators.h | 14 ++++++++------ Eigen/src/Core/ProductEvaluators.h | 11 ++++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index fb0cdc99c..a8b359085 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -907,8 +907,8 @@ struct unary_evaluator > enum { CoeffReadCost = evaluator::CoeffReadCost, - - Flags = (evaluator::Flags & HereditaryBits & ~RowMajorBit) | (traits::Flags & RowMajorBit), + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits::Flags & RowMajorBit), Alignment = evaluator::Alignment }; @@ -1149,6 +1149,7 @@ struct unary_evaluator > // FIXME enable DirectAccess with negative strides? Flags0 = evaluator::Flags, LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1)) ? LinearAccessBit : 0, Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), @@ -1158,8 +1159,8 @@ struct unary_evaluator > EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) : m_argImpl(reverse.nestedExpression()), - m_rows(ReverseRow ? reverse.nestedExpression().rows() : 0), - m_cols(ReverseCol ? reverse.nestedExpression().cols() : 0) + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) { } EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const @@ -1233,8 +1234,9 @@ protected: evaluator m_argImpl; // If we do not reverse rows, then we do not need to know the number of rows; same for columns - const variable_if_dynamic m_rows; - const variable_if_dynamic m_cols; + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; }; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 2927fcc0e..bd1e1c85d 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -484,7 +484,8 @@ struct product_evaluator, ProductTag, DenseShape, Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) | (EvalToRowMajor ? RowMajorBit : 0) // TODO enable vectorization for mixed types - | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) + | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), Alignment = CanVectorizeLhs ? LhsAlignment : CanVectorizeRhs ? RhsAlignment @@ -531,6 +532,14 @@ struct product_evaluator, ProductTag, DenseShape, return res; } + template + const PacketType packet(Index index) const + { + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + return packet(row,col); + } + protected: const LhsNested m_lhs; const RhsNested m_rhs; From ca001d7c2a674da2da1a50f9aa122dafedb5123f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 10:06:47 +0100 Subject: [PATCH 266/344] Big 1009, part 2/2: add static assertion on LinearAccessBit in coeff(index)-like methods. --- Eigen/src/Core/DenseCoeffsBase.h | 6 ++++++ Eigen/src/Core/util/StaticAssert.h | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 339c0986b..820a90e6f 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -138,6 +138,8 @@ class DenseCoeffsBase : public EigenBase EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); return internal::evaluator(derived()).coeff(index); } @@ -243,6 +245,8 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) typedef typename internal::packet_traits::type DefaultPacketType; eigen_internal_assert(index >= 0 && index < size()); return internal::evaluator(derived()).template packet(index); @@ -370,6 +374,8 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); return internal::evaluator(derived()).coeffRef(index); } diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 9d7302d81..f35ddb372 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -94,7 +94,8 @@ OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, STORAGE_LAYOUT_DOES_NOT_MATCH, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS }; }; From 0ff127e89675b3a4bf598fbb1317a3b85914916b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 10:18:39 +0100 Subject: [PATCH 267/344] Preserve CMAKE_CXX_FLAGS in BTL --- bench/btl/CMakeLists.txt | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/bench/btl/CMakeLists.txt b/bench/btl/CMakeLists.txt index 9444b450c..38ff9f483 100644 --- a/bench/btl/CMakeLists.txt +++ b/bench/btl/CMakeLists.txt @@ -11,29 +11,24 @@ SET(CMAKE_INCLUDE_CURRENT_DIR ON) string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER}) IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) - SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG") - SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") - IF(NOT BTL_NOVEC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") - SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -msse2") - ELSE(NOT BTL_NOVEC) + SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}") + SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}") + IF(BTL_NOVEC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - ENDIF(NOT BTL_NOVEC) + ENDIF(BTL_NOVEC) ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) IF(MSVC) SET(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG") # SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") - IF(NOT BTL_NOVEC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2") - ELSE(NOT BTL_NOVEC) + IF(BTL_NOVEC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - ENDIF(NOT BTL_NOVEC) + ENDIF(BTL_NOVEC) ENDIF(MSVC) if(IS_ICPC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fast") + set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}") + set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}") endif(IS_ICPC) include_directories( From 1261d020c31d64ad835c532251d9eed9a55d7c92 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 10:39:09 +0100 Subject: [PATCH 268/344] bug #1120, superlu: mem_usage_t is now uniquely defined, so let's use it. --- Eigen/src/SuperLUSupport/SuperLUSupport.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index afb5904e0..c145e25bd 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -14,12 +14,11 @@ namespace Eigen { #define DECL_GSSVX(PREFIX,FLOATTYPE,KEYTYPE) \ extern "C" { \ - typedef struct { FLOATTYPE for_lu; FLOATTYPE total_needed; int expansions; } PREFIX##mem_usage_t; \ extern void PREFIX##gssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ void *, int, SuperMatrix *, SuperMatrix *, \ FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, \ - PREFIX##mem_usage_t *, SuperLUStat_t *, int *); \ + mem_usage_t *, SuperLUStat_t *, int *); \ } \ inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A, \ int *perm_c, int *perm_r, int *etree, char *equed, \ @@ -29,7 +28,7 @@ namespace Eigen { FLOATTYPE *recip_pivot_growth, \ FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \ SuperLUStat_t *stats, int *info, KEYTYPE) { \ - PREFIX##mem_usage_t mem_usage; \ + mem_usage_t mem_usage; \ PREFIX##gssvx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ U, work, lwork, B, X, recip_pivot_growth, rcond, \ ferr, berr, &mem_usage, stats, info); \ @@ -53,7 +52,7 @@ DECL_GSSVX(z,double,std::complex) extern void PREFIX##gsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ void *, int, SuperMatrix *, SuperMatrix *, FLOATTYPE *, FLOATTYPE *, \ - PREFIX##mem_usage_t *, SuperLUStat_t *, int *); \ + mem_usage_t *, SuperLUStat_t *, int *); \ } \ inline float SuperLU_gsisx(superlu_options_t *options, SuperMatrix *A, \ int *perm_c, int *perm_r, int *etree, char *equed, \ @@ -63,7 +62,7 @@ DECL_GSSVX(z,double,std::complex) FLOATTYPE *recip_pivot_growth, \ FLOATTYPE *rcond, \ SuperLUStat_t *stats, int *info, KEYTYPE) { \ - PREFIX##mem_usage_t mem_usage; \ + mem_usage_t mem_usage; \ PREFIX##gsisx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ U, work, lwork, B, X, recip_pivot_growth, rcond, \ &mem_usage, stats, info); \ From da46b1ed5412a7bf727f489ca4f782abcd4743e8 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 15:57:18 +0100 Subject: [PATCH 269/344] bug #1112: fix compilation on exotic architectures --- unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h index b37481cbe..14a8aef58 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h @@ -348,7 +348,7 @@ void matrix_exp_compute(const MatrixType& arg, ResultType &result) typedef typename NumTraits::Real RealScalar; typedef typename std::complex ComplexScalar; if (sizeof(RealScalar) > 14) { - result = arg.matrixFunction(StdStemFunctions::exp); + result = arg.matrixFunction(internal::stem_function_exp); return; } #endif From 3f32f5ec221c443fa354aaa1ea6d5628bb971474 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 16:27:53 +0100 Subject: [PATCH 270/344] ArrayBase::sign: add unit test and fix doc --- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 3 +-- test/array.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 60d56a252..a9310f12d 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -141,8 +141,7 @@ sqrt() const /** \returns an expression of the coefficient-wise signum of *this. * - * This function computes the coefficient-wise signum. The function MatrixBase::sign() in the - * unsupported module MatrixFunctions computes the matrix square root. + * This function computes the coefficient-wise signum. * * Example: \include Cwise_sign.cpp * Output: \verbinclude Cwise_sign.out diff --git a/test/array.cpp b/test/array.cpp index d437105da..367bda2c4 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -230,6 +230,7 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.square(), square(m1)); VERIFY_IS_APPROX(m1.cube(), cube(m1)); VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); // avoid NaNs with abs() so verification doesn't fail @@ -255,6 +256,9 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(inverse(inverse(m1)),m1); VERIFY((abs(m1) == m1 || abs(m1) == -m1).all()); VERIFY_IS_APPROX(m3, sqrt(abs2(m1))); + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1*m1.sign(),m1.abs()); + VERIFY_IS_APPROX(m1.sign() * m1.abs(), m1); VERIFY_IS_APPROX(numext::abs2(numext::real(m1)) + numext::abs2(numext::imag(m1)), numext::abs2(m1)); VERIFY_IS_APPROX(numext::abs2(real(m1)) + numext::abs2(imag(m1)), numext::abs2(m1)); @@ -348,6 +352,7 @@ template void array_complex(const ArrayType& m) VERIFY_IS_APPROX(m1.square(), square(m1)); VERIFY_IS_APPROX(m1.cube(), cube(m1)); VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2)); @@ -391,6 +396,9 @@ template void array_complex(const ArrayType& m) VERIFY_IS_APPROX(abs(m1), sqrt(abs2(m1))); VERIFY_IS_APPROX(log10(m1), log(m1)/log(10)); + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1); + // scalar by array division const RealScalar tiny = sqrt(std::numeric_limits::epsilon()); s1 += Scalar(tiny); From 6bdeb8cfbe5d7da0380237fa5622a29223a261f3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 21:58:36 +0100 Subject: [PATCH 271/344] bug #918, umfpack: add access to umfpack return code and parameters --- Eigen/src/UmfPackSupport/UmfPackSupport.h | 57 ++++++++++++++++++++--- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index 0a5043ef2..38db3bae4 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -16,6 +16,13 @@ namespace Eigen { // generic double/complex wrapper functions: + +inline void umfpack_defaults(double control[UMFPACK_CONTROL], double) +{ umfpack_di_defaults(control); } + +inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex) +{ umfpack_zi_defaults(control); } + inline void umfpack_free_numeric(void **Numeric, double) { umfpack_di_free_numeric(Numeric); *Numeric = 0; } @@ -142,6 +149,8 @@ class UmfPackLU : public SparseSolverBase > public: + typedef Array UmfpackControl; + UmfPackLU() : m_dummy(0,0), mp_matrix(m_dummy) { @@ -230,6 +239,39 @@ class UmfPackLU : public SparseSolverBase > analyzePattern_impl(); } + /** Provides the return status code returned by UmfPack during the numeric + * factorization. + * + * \sa factorize(), compute() + */ + inline int umfpackFactorizeReturncode() const + { + eigen_assert(m_numeric && "UmfPackLU: you must first call factorize()"); + return m_fact_errorCode; + } + + /** Provides access to the control settings array used by UmfPack. + * + * If this array contains NaN's, the default values are used. + * + * See UMFPACK documentation for details. + */ + inline const UmfpackControl& umfpackControl() const + { + return m_control; + } + + /** Provides access to the control settings array used by UmfPack. + * + * If this array contains NaN's, the default values are used. + * + * See UMFPACK documentation for details. + */ + inline UmfpackControl& umfpackControl() + { + return m_control; + } + /** Performs a numeric decomposition of \a matrix * * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed. @@ -269,11 +311,12 @@ class UmfPackLU : public SparseSolverBase > void analyzePattern_impl() { + umfpack_defaults(m_control.data(), Scalar()); int errorCode = 0; errorCode = umfpack_symbolic(internal::convert_index(mp_matrix.rows()), internal::convert_index(mp_matrix.cols()), mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - &m_symbolic, 0, 0); + &m_symbolic, m_control.data(), 0); m_isInitialized = true; m_info = errorCode ? InvalidInput : Success; @@ -284,11 +327,10 @@ class UmfPackLU : public SparseSolverBase > void factorize_impl() { - int errorCode; - errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - m_symbolic, &m_numeric, 0, 0); + m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), + m_symbolic, &m_numeric, m_control.data(), 0); - m_info = errorCode ? NumericalIssue : Success; + m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue; m_factorizationIsOk = true; m_extractedDataAreDirty = true; } @@ -311,6 +353,9 @@ class UmfPackLU : public SparseSolverBase > // cached data to reduce reallocation, etc. mutable LUMatrixType m_l; + int m_fact_errorCode; + UmfpackControl m_control; + mutable LUMatrixType m_u; mutable IntColVectorType m_p; mutable IntRowVectorType m_q; @@ -390,7 +435,7 @@ bool UmfPackLU::_solve_impl(const MatrixBase &b, MatrixBas x_ptr = &x.col(j).coeffRef(0); errorCode = umfpack_solve(UMFPACK_A, mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, 0, 0); + x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), 0); if(x.innerStride()!=1) x.col(j) = x_tmp; if (errorCode!=0) From afa11d646d0825d6e8ada3fe6676bc66746b5c99 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 27 Nov 2015 22:04:22 +0100 Subject: [PATCH 272/344] Fix UmfPackLU ctor for exppressions --- Eigen/src/UmfPackSupport/UmfPackSupport.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index 38db3bae4..caac082f3 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -157,7 +157,8 @@ class UmfPackLU : public SparseSolverBase > init(); } - explicit UmfPackLU(const MatrixType& matrix) + template + explicit UmfPackLU(const InputMatrixType& matrix) : mp_matrix(matrix) { init(); From 6fcd316f2366a38ddfda0aabcf536b1d475e51b7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 30 Nov 2015 14:48:11 +0100 Subject: [PATCH 273/344] Extend superlu cmake script to check version --- bench/spbench/CMakeLists.txt | 2 +- cmake/FindSuperLU.cmake | 52 +++++++++++++++++++++++++++++++++--- test/CMakeLists.txt | 2 +- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 6e0e1b103..8d53f4ae2 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -29,7 +29,7 @@ if(UMFPACK_FOUND AND BLAS_FOUND) set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES}) endif() -find_package(SuperLU) +find_package(SuperLU 4.0) if(SUPERLU_FOUND AND BLAS_FOUND) add_definitions("-DEIGEN_SUPERLU_SUPPORT") include_directories(${SUPERLU_INCLUDES}) diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake index 8a3df3666..259ed7320 100644 --- a/cmake/FindSuperLU.cmake +++ b/cmake/FindSuperLU.cmake @@ -17,10 +17,54 @@ find_path(SUPERLU_INCLUDES SRC ) -find_library(SUPERLU_LIBRARIES superlu PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) - +find_library(SUPERLU_LIBRARIES NAMES "superlu_4.3" "superlu_4.2" "superlu_4.1" "superlu_4.0" "superlu_3.1" "superlu_3.0" "superlu" PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) + +if(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) + +include(CheckCXXSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state() + +set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${SUPERLU_INCLUDES}) + +# check whether struct mem_usage_t is globally defined +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + mem_usage_t mem; + return 0; +}" +SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + + +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + return SLU_SINGLE; +}" +SUPERLU_HAS_CLEAN_ENUMS) + +if(SUPERLU_HAS_CLEAN_ENUMS) + # at least 4.3 + set(SUPERLU_VERSION_VAR "4.3") +elseif(SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + # at least 4.3 + set(SUPERLU_VERSION_VAR "4.0") +else() + set(SUPERLU_VERSION_VAR "3.0") +endif() + +cmake_pop_check_state() + +endif() + include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(SUPERLU DEFAULT_MSG - SUPERLU_INCLUDES SUPERLU_LIBRARIES) +find_package_handle_standard_args(SUPERLU + REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES + VERSION_VAR SUPERLU_VERSION_VAR) mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 822ca8f10..bbebf29cd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -68,7 +68,7 @@ else() ei_add_property(EIGEN_MISSING_BACKENDS "UmfPack, ") endif() -find_package(SuperLU) +find_package(SuperLU 4.0) if(SUPERLU_FOUND) add_definitions("-DEIGEN_SUPERLU_SUPPORT") include_directories(${SUPERLU_INCLUDES}) From fd727249ada26896881d2f6905883f6ff9bbafe0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 30 Nov 2015 16:00:22 +0100 Subject: [PATCH 274/344] Update ADOL-C support. --- unsupported/Eigen/AdolcForward | 2 +- unsupported/test/forward_adolc.cpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/AdolcForward b/unsupported/Eigen/AdolcForward index 2627decd0..15f5f0731 100644 --- a/unsupported/Eigen/AdolcForward +++ b/unsupported/Eigen/AdolcForward @@ -25,7 +25,7 @@ #ifndef NUMBER_DIRECTIONS # define NUMBER_DIRECTIONS 2 #endif -#include +#include // adolc defines some very stupid macros: #if defined(malloc) diff --git a/unsupported/test/forward_adolc.cpp b/unsupported/test/forward_adolc.cpp index d4baafe62..866db8e86 100644 --- a/unsupported/test/forward_adolc.cpp +++ b/unsupported/test/forward_adolc.cpp @@ -13,8 +13,6 @@ #define NUMBER_DIRECTIONS 16 #include -int adtl::ADOLC_numDir; - template EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p) { @@ -123,7 +121,7 @@ template void adolc_forward_jacobian(const Func& f) void test_forward_adolc() { - adtl::ADOLC_numDir = NUMBER_DIRECTIONS; + adtl::setNumDir(NUMBER_DIRECTIONS); for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST(( adolc_forward_jacobian(TestFunc1()) )); From 034ca5a22dd8f83c7d9b59428c0b8a41b4cfef88 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 30 Nov 2015 17:05:42 +0100 Subject: [PATCH 275/344] Clean hardcoded compilation options --- CMakeLists.txt | 6 +----- cmake/EigenConfigureTesting.cmake | 9 +-------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 401400a21..aebc6d45d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,11 +118,7 @@ endmacro(ei_add_cxx_compiler_flag) if(NOT MSVC) # We assume that other compilers are partly compatible with GNUCC - -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions") - set(CMAKE_CXX_FLAGS_DEBUG "-g3") - set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2") - + # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag # adding -Werror turns such warnings into errors check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR) diff --git a/cmake/EigenConfigureTesting.cmake b/cmake/EigenConfigureTesting.cmake index 0ee484e8c..afc24b5e9 100644 --- a/cmake/EigenConfigureTesting.cmake +++ b/cmake/EigenConfigureTesting.cmake @@ -46,16 +46,9 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(EIGEN_COVERAGE_TESTING) set(COVERAGE_FLAGS "-fprofile-arcs -ftest-coverage") set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/test/") - else(EIGEN_COVERAGE_TESTING) - set(COVERAGE_FLAGS "") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS}") endif(EIGEN_COVERAGE_TESTING) - if(CMAKE_SYSTEM_NAME MATCHES Linux) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS} -g2") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COVERAGE_FLAGS} -O2 -g2") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COVERAGE_FLAGS} -fno-inline-functions") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COVERAGE_FLAGS} -O0 -g3") - endif(CMAKE_SYSTEM_NAME MATCHES Linux) elseif(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS") endif(CMAKE_COMPILER_IS_GNUCXX) From e7a1c48185be8f964d288ee045e6ce6cebf1ae95 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 30 Nov 2015 22:19:20 +0100 Subject: [PATCH 276/344] Update BLAS API unit tests --- blas/testing/cblat1.f | 83 ++++++++++++++----- blas/testing/cblat2.f | 188 ++++++++++++++++++++++++----------------- blas/testing/cblat3.f | 185 ++++++++++++++++++++++++++--------------- blas/testing/dblat2.f | 186 ++++++++++++++++++++++++----------------- blas/testing/dblat3.f | 168 ++++++++++++++++++++++++------------- blas/testing/sblat2.f | 186 ++++++++++++++++++++++++----------------- blas/testing/sblat3.f | 168 ++++++++++++++++++++++++------------- blas/testing/zblat1.f | 83 ++++++++++++++----- blas/testing/zblat2.f | 188 ++++++++++++++++++++++++----------------- blas/testing/zblat3.f | 189 +++++++++++++++++++++++++++--------------- 10 files changed, 1036 insertions(+), 588 deletions(-) diff --git a/blas/testing/cblat1.f b/blas/testing/cblat1.f index a4c996fda..8ca67fb19 100644 --- a/blas/testing/cblat1.f +++ b/blas/testing/cblat1.f @@ -1,7 +1,49 @@ +*> \brief \b CBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 1 BLAS. +*> Based upon the original BLAS test routine together with: +*> +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT1 -* Test program for the COMPLEX Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0E0,6.0E0), (5.0E0,6.0E0), (0.1E0,0.1E0), + (-0.6E0,0.1E0), (0.1E0,-0.3E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.1E0,0.4E0), - + (0.4E0,0.1E0), (0.1E0,0.2E0), (2.0E0,3.0E0), + + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.5E0,0.0E0), + + (0.0E0,0.5E0), (0.0E0,0.2E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -129,10 +171,10 @@ + (3.0E0,6.0E0), (-0.6E0,0.1E0), (4.0E0,7.0E0), + (0.1E0,-0.3E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.3E0,0.1E0), (5.0E0,8.0E0), - + (0.1E0,0.4E0), (6.0E0,9.0E0), (0.4E0,0.1E0), - + (8.0E0,3.0E0), (0.1E0,0.2E0), (9.0E0,4.0E0)/ - DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.7E0/ - DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.7E0/ + + (0.5E0,0.0E0), (6.0E0,9.0E0), (0.0E0,0.5E0), + + (8.0E0,3.0E0), (0.0E0,0.2E0), (9.0E0,4.0E0)/ + DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.8E0/ + DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.6E0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -145,8 +187,8 @@ + (0.11E0,-0.03E0), (-0.17E0,0.46E0), + (-0.17E0,-0.19E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.19E0,-0.17E0), (0.32E0,0.09E0), - + (0.23E0,-0.24E0), (0.18E0,0.01E0), + + (0.19E0,-0.17E0), (0.20E0,-0.35E0), + + (0.35E0,0.20E0), (0.14E0,0.08E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0), + (2.0E0,3.0E0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), @@ -162,9 +204,9 @@ + (-0.17E0,0.46E0), (4.0E0,7.0E0), + (-0.17E0,-0.19E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.19E0,-0.17E0), (5.0E0,8.0E0), - + (0.32E0,0.09E0), (6.0E0,9.0E0), - + (0.23E0,-0.24E0), (8.0E0,3.0E0), - + (0.18E0,0.01E0), (9.0E0,4.0E0)/ + + (0.20E0,-0.35E0), (6.0E0,9.0E0), + + (0.35E0,0.20E0), (8.0E0,3.0E0), + + (0.14E0,0.08E0), (9.0E0,4.0E0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -177,8 +219,8 @@ + (0.03E0,0.03E0), (-0.18E0,0.03E0), + (0.03E0,-0.09E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.09E0,0.03E0), (0.03E0,0.12E0), - + (0.12E0,0.03E0), (0.03E0,0.06E0), (2.0E0,3.0E0), + + (0.09E0,0.03E0), (0.15E0,0.00E0), + + (0.00E0,0.15E0), (0.00E0,0.06E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -193,8 +235,8 @@ + (-0.18E0,0.03E0), (4.0E0,7.0E0), + (0.03E0,-0.09E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.09E0,0.03E0), (5.0E0,8.0E0), - + (0.03E0,0.12E0), (6.0E0,9.0E0), (0.12E0,0.03E0), - + (8.0E0,3.0E0), (0.03E0,0.06E0), (9.0E0,4.0E0)/ + + (0.15E0,0.00E0), (6.0E0,9.0E0), (0.00E0,0.15E0), + + (8.0E0,3.0E0), (0.00E0,0.06E0), (9.0E0,4.0E0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + REAL ZERO + PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0E0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff --git a/blas/testing/cblat2.f b/blas/testing/cblat2.f index 20f188100..5833ea81a 100644 --- a/blas/testing/cblat2.f +++ b/blas/testing/cblat2.f @@ -1,68 +1,114 @@ +*> \brief \b CBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 17 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 35 lines: +*> 'cblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> CGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CGERC T PUT F FOR NO TEST. SAME COLUMNS. +*> CGERU T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPR T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER2 T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT2 * -* Test program for the COMPLEX Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 17 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 35 lines: -* 'CBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* CGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* CGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHEMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHPMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* CTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* CTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* CGERC T PUT F FOR NO TEST. SAME COLUMNS. -* CGERU T PUT F FOR NO TEST. SAME COLUMNS. -* CHER T PUT F FOR NO TEST. SAME COLUMNS. -* CHPR T PUT F FOR NO TEST. SAME COLUMNS. -* CHER2 T PUT F FOR NO TEST. SAME COLUMNS. -* CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -71,8 +117,8 @@ PARAMETER ( NSUBS = 17 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) - REAL RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -126,7 +172,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -135,7 +181,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -240,14 +286,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 90 CONTINUE - IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 100 - EPS = RHALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMVCH using exact data. @@ -3079,7 +3118,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/cblat3.f b/blas/testing/cblat3.f index b26be91e6..09f2cb9c5 100644 --- a/blas/testing/cblat3.f +++ b/blas/testing/cblat3.f @@ -1,50 +1,96 @@ +*> \brief \b CBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 9 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 23 lines: +*> 'cblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> CGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CHEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> CHERK T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER2K T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT3 * -* Test program for the COMPLEX Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 9 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 23 lines: -* 'CBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* CGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* CHEMM T PUT F FOR NO TEST. SAME COLUMNS. -* CSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* CTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* CTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* CHERK T PUT F FOR NO TEST. SAME COLUMNS. -* CSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* CHER2K T PUT F FOR NO TEST. SAME COLUMNS. -* CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -53,8 +99,8 @@ PARAMETER ( NSUBS = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) - REAL RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -103,7 +149,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -112,7 +158,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -189,14 +235,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 70 CONTINUE - IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 80 - EPS = RHALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMMCH using exact data. @@ -1946,7 +1985,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1956,12 +1995,19 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) +* 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM +* with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. COMPLEX ALPHA, BETA REAL RALPHA, RBETA @@ -1979,6 +2025,14 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA, BETA, RALPHA, and RBETA. +* + ALPHA = CMPLX( ONE, -ONE ) + BETA = CMPLX( TWO, -TWO ) + RALPHA = ONE + RBETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 @@ -2205,16 +2259,16 @@ CALL CHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2272,16 +2326,16 @@ CALL CSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -3268,7 +3322,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/dblat2.f b/blas/testing/dblat2.f index 4002d4368..0fa80afa4 100644 --- a/blas/testing/dblat2.f +++ b/blas/testing/dblat2.f @@ -1,75 +1,121 @@ +*> \brief \b DBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the DOUBLE PRECISION Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 16 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 34 lines: +*> 'dblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 0.9 VALUES OF BETAC +*> DGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DGER T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPR T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup double_blas_testing +* +* ===================================================================== PROGRAM DBLAT2 * -* Test program for the DOUBLE PRECISION Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 16 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 34 lines: -* 'DBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 0.9 VALUES OF BETA -* DGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* DGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSYMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSPMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* DTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* DTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* DGER T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR T PUT F FOR NO TEST. SAME COLUMNS. -* DSPR T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. -* DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) - DOUBLE PRECISION ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -121,7 +167,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -130,7 +176,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -235,14 +281,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 90 CONTINUE - IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 100 - EPS = HALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMVCH using exact data. @@ -2982,7 +3021,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/dblat3.f b/blas/testing/dblat3.f index 082e03e5e..8d37c7453 100644 --- a/blas/testing/dblat3.f +++ b/blas/testing/dblat3.f @@ -1,55 +1,101 @@ +*> \brief \b DBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the DOUBLE PRECISION Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 6 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 20 lines: +*> 'dblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 1.3 VALUES OF BETA +*> DGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup double_blas_testing +* +* ===================================================================== PROGRAM DBLAT3 * -* Test program for the DOUBLE PRECISION Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 6 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 20 lines: -* 'DBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 1.3 VALUES OF BETA -* DGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* DSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* DTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* DTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* DSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) - DOUBLE PRECISION ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -96,7 +142,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -105,7 +151,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -182,14 +228,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 70 CONTINUE - IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 80 - EPS = HALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMMCH using exact data. @@ -1802,7 +1841,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, BETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1812,12 +1851,18 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA and BETA (eca) +* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, BETA * .. Local Arrays .. @@ -1834,6 +1879,12 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA and BETA. +* + ALPHA = ONE + BETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL DGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) @@ -1963,16 +2014,16 @@ CALL DSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2660,7 +2711,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/sblat2.f b/blas/testing/sblat2.f index 057a85429..71605ed31 100644 --- a/blas/testing/sblat2.f +++ b/blas/testing/sblat2.f @@ -1,75 +1,121 @@ +*> \brief \b SBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the REAL Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 16 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 34 lines: +*> 'sblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 0.9 VALUES OF BETA +*> SGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> STBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> STPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> SGER T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPR T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup single_blas_testing +* +* ===================================================================== PROGRAM SBLAT2 * -* Test program for the REAL Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 16 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 34 lines: -* 'SBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 0.9 VALUES OF BETA -* SGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* SGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSYMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSBMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSPMV T PUT F FOR NO TEST. SAME COLUMNS. -* STRMV T PUT F FOR NO TEST. SAME COLUMNS. -* STBMV T PUT F FOR NO TEST. SAME COLUMNS. -* STPMV T PUT F FOR NO TEST. SAME COLUMNS. -* STRSV T PUT F FOR NO TEST. SAME COLUMNS. -* STBSV T PUT F FOR NO TEST. SAME COLUMNS. -* STPSV T PUT F FOR NO TEST. SAME COLUMNS. -* SGER T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR T PUT F FOR NO TEST. SAME COLUMNS. -* SSPR T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. -* SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) - REAL ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -121,7 +167,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -130,7 +176,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -235,14 +281,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 90 CONTINUE - IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 100 - EPS = HALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMVCH using exact data. @@ -2982,7 +3021,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/sblat3.f b/blas/testing/sblat3.f index 325a9eb92..879269633 100644 --- a/blas/testing/sblat3.f +++ b/blas/testing/sblat3.f @@ -1,55 +1,101 @@ +*> \brief \b SBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the REAL Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 6 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 20 lines: +*> 'sblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 1.3 VALUES OF BETA +*> SGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> STRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> STRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup single_blas_testing +* +* ===================================================================== PROGRAM SBLAT3 * -* Test program for the REAL Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 6 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 20 lines: -* 'SBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 1.3 VALUES OF BETA -* SGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* SSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* STRMM T PUT F FOR NO TEST. SAME COLUMNS. -* STRSM T PUT F FOR NO TEST. SAME COLUMNS. -* SSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) - REAL ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -96,7 +142,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -105,7 +151,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -182,14 +228,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 70 CONTINUE - IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 80 - EPS = HALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMMCH using exact data. @@ -1802,7 +1841,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, BETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1812,12 +1851,18 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA and BETA (eca) +* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. REAL ALPHA, BETA * .. Local Arrays .. @@ -1834,6 +1879,12 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA and BETA. +* + ALPHA = ONE + BETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL SGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) @@ -1963,16 +2014,16 @@ CALL SSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2660,7 +2711,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/zblat1.f b/blas/testing/zblat1.f index e2415e1c4..d30112c63 100644 --- a/blas/testing/zblat1.f +++ b/blas/testing/zblat1.f @@ -1,7 +1,49 @@ +*> \brief \b ZBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 1 BLAS. +*> +*> Based upon the original BLAS test routine together with: +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT1 -* Test program for the COMPLEX*16 Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0D0,6.0D0), (5.0D0,6.0D0), (0.1D0,0.1D0), + (-0.6D0,0.1D0), (0.1D0,-0.3D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.1D0,0.4D0), - + (0.4D0,0.1D0), (0.1D0,0.2D0), (2.0D0,3.0D0), + + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.5D0,0.0D0), + + (0.0D0,0.5D0), (0.0D0,0.2D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -129,10 +171,10 @@ + (3.0D0,6.0D0), (-0.6D0,0.1D0), (4.0D0,7.0D0), + (0.1D0,-0.3D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.3D0,0.1D0), (5.0D0,8.0D0), - + (0.1D0,0.4D0), (6.0D0,9.0D0), (0.4D0,0.1D0), - + (8.0D0,3.0D0), (0.1D0,0.2D0), (9.0D0,4.0D0)/ - DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.7D0/ - DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.7D0/ + + (0.5D0,0.0D0), (6.0D0,9.0D0), (0.0D0,0.5D0), + + (8.0D0,3.0D0), (0.0D0,0.2D0), (9.0D0,4.0D0)/ + DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.8D0/ + DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.6D0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -145,8 +187,8 @@ + (0.11D0,-0.03D0), (-0.17D0,0.46D0), + (-0.17D0,-0.19D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.19D0,-0.17D0), (0.32D0,0.09D0), - + (0.23D0,-0.24D0), (0.18D0,0.01D0), + + (0.19D0,-0.17D0), (0.20D0,-0.35D0), + + (0.35D0,0.20D0), (0.14D0,0.08D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0), + (2.0D0,3.0D0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), @@ -162,9 +204,9 @@ + (-0.17D0,0.46D0), (4.0D0,7.0D0), + (-0.17D0,-0.19D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.19D0,-0.17D0), (5.0D0,8.0D0), - + (0.32D0,0.09D0), (6.0D0,9.0D0), - + (0.23D0,-0.24D0), (8.0D0,3.0D0), - + (0.18D0,0.01D0), (9.0D0,4.0D0)/ + + (0.20D0,-0.35D0), (6.0D0,9.0D0), + + (0.35D0,0.20D0), (8.0D0,3.0D0), + + (0.14D0,0.08D0), (9.0D0,4.0D0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -177,8 +219,8 @@ + (0.03D0,0.03D0), (-0.18D0,0.03D0), + (0.03D0,-0.09D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.09D0,0.03D0), (0.03D0,0.12D0), - + (0.12D0,0.03D0), (0.03D0,0.06D0), (2.0D0,3.0D0), + + (0.09D0,0.03D0), (0.15D0,0.00D0), + + (0.00D0,0.15D0), (0.00D0,0.06D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -193,8 +235,8 @@ + (-0.18D0,0.03D0), (4.0D0,7.0D0), + (0.03D0,-0.09D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.09D0,0.03D0), (5.0D0,8.0D0), - + (0.03D0,0.12D0), (6.0D0,9.0D0), (0.12D0,0.03D0), - + (8.0D0,3.0D0), (0.03D0,0.06D0), (9.0D0,4.0D0)/ + + (0.15D0,0.00D0), (6.0D0,9.0D0), (0.00D0,0.15D0), + + (8.0D0,3.0D0), (0.00D0,0.06D0), (9.0D0,4.0D0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + DOUBLE PRECISION ZERO + PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0D0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff --git a/blas/testing/zblat2.f b/blas/testing/zblat2.f index e65cdcc70..53129a11e 100644 --- a/blas/testing/zblat2.f +++ b/blas/testing/zblat2.f @@ -1,68 +1,114 @@ +*> \brief \b ZBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 17 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 35 lines: +*> 'zblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGERC T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGERU T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPR T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT2 * -* Test program for the COMPLEX*16 Level 2 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 17 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 35 lines: -* 'ZBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZGERC T PUT F FOR NO TEST. SAME COLUMNS. -* ZGERU T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPR T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. -* -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -72,8 +118,8 @@ COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) - DOUBLE PRECISION RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -127,7 +173,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -136,7 +182,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -241,14 +287,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 90 CONTINUE - IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 100 - EPS = RHALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMVCH using exact data. @@ -3087,7 +3126,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LZERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/blas/testing/zblat3.f b/blas/testing/zblat3.f index d6a522f2a..59ca24145 100644 --- a/blas/testing/zblat3.f +++ b/blas/testing/zblat3.f @@ -1,50 +1,97 @@ +*> \brief \b ZBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 9 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 23 lines: +*> 'zblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHERK T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT3 * -* Test program for the COMPLEX*16 Level 3 Blas. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 9 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 23 lines: -* 'ZBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* ZHERK T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. -* -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -54,8 +101,8 @@ COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) - DOUBLE PRECISION RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -104,7 +151,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -113,7 +160,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -190,14 +237,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 70 CONTINUE - IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 80 - EPS = RHALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMMCH using exact data. @@ -1949,7 +1989,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1959,12 +1999,20 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) +* 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM +* with INFOT = 9 (eca) +* 10-9-00: Declared INTRINSIC DCMPLX (susan) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION RALPHA, RBETA @@ -1973,6 +2021,8 @@ * .. External Subroutines .. EXTERNAL ZGEMM, ZHEMM, ZHER2K, ZHERK, CHKXER, ZSYMM, $ ZSYR2K, ZSYRK, ZTRMM, ZTRSM +* .. Intrinsic Functions .. + INTRINSIC DCMPLX * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. @@ -1982,6 +2032,14 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA, BETA, RALPHA, and RBETA. +* + ALPHA = DCMPLX( ONE, -ONE ) + BETA = DCMPLX( TWO, -TWO ) + RALPHA = ONE + RBETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 @@ -2208,16 +2266,16 @@ CALL ZHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2275,16 +2333,16 @@ CALL ZSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -3274,7 +3332,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LZERES = .TRUE. GO TO 80 70 CONTINUE From 1d906d883df54c87d7a6c8418b6aa282be0f8556 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 30 Nov 2015 22:20:31 +0100 Subject: [PATCH 277/344] Fix degenerate cases in syrk and trsm --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 2 -- blas/level3_impl.h | 14 +++++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 94754bf66..229e96ceb 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -203,8 +203,6 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n const Index actual_l2 = 1572864; // == 1.5 MB #endif - - // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2. // The second half is implicitly reserved to access the result and lhs coefficients. // When k #include "common.h" int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) @@ -133,6 +133,9 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, if(info) return xerbla_(SCALAR_SUFFIX_UP"TRSM ",&info,6); + if(*m==0 || *n==0) + return 0; + int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); if(SIDE(*side)==LEFT) @@ -358,6 +361,9 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp else matrix(c, *n, *n, *ldc).triangularView() *= beta; } + if(*n==0 || *k==0) + return 0; + #if ISCOMPLEX // FIXME add support for symmetric complex matrix if(UPLO(*uplo)==UP) @@ -392,6 +398,8 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); +// std::cerr << "in syr2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if(OP(*op)==INVALID) info = 2; @@ -506,6 +514,8 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*conj(a')*a + beta*c for op = 'C'or'c' int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) { +// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); static functype func[8]; @@ -577,6 +587,8 @@ int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal Scalar alpha = *reinterpret_cast(palpha); RealScalar beta = *pbeta; +// std::cerr << "in her2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if((OP(*op)==INVALID) || (OP(*op)==TR)) info = 2; From 844561939f931643df7d6e2387288bb0fe7b600f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 30 Nov 2015 22:36:14 +0100 Subject: [PATCH 278/344] Do not check NeedsToAlign if no static alignment --- test/dynalloc.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp index 3d895f2e0..6f22e1ab4 100644 --- a/test/dynalloc.cpp +++ b/test/dynalloc.cpp @@ -129,13 +129,6 @@ void test_dynalloc() for (int i=0; i() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST(check_dynaligned() ); - CALL_SUBTEST( check_custom_new_delete() ); CALL_SUBTEST( check_custom_new_delete() ); CALL_SUBTEST( check_custom_new_delete() ); @@ -144,6 +137,16 @@ void test_dynalloc() // check static allocation, who knows ? #if EIGEN_MAX_STATIC_ALIGN_BYTES + for (int i=0; i() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + } + { MyStruct foo0; VERIFY(size_t(foo0.avec.data())%ALIGNMENT==0); MyClassA fooA; VERIFY(size_t(fooA.avec.data())%ALIGNMENT==0); From 6c02cbbb0f1786f5f22285342850250c28845272 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 1 Dec 2015 09:45:56 +0100 Subject: [PATCH 279/344] Fix matrix to quaternion (and angleaxis) conversion for matrix expression. --- Eigen/src/Geometry/Quaternion.h | 3 ++- test/geo_quaternion.cpp | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 56fa2bfbf..32e7e76fa 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -739,8 +739,9 @@ template struct quaternionbase_assign_impl { typedef typename Other::Scalar Scalar; - template static inline void run(QuaternionBase& q, const Other& mat) + template static inline void run(QuaternionBase& q, const Other& a_mat) { + const typename internal::nested_eval::type mat(a_mat); using std::sqrt; // This algorithm comes from "Quaternion Calculus and Fast Animation", // Ken Shoemake, 1987 SIGGRAPH course notes diff --git a/test/geo_quaternion.cpp b/test/geo_quaternion.cpp index 17229be4a..761bb52b4 100644 --- a/test/geo_quaternion.cpp +++ b/test/geo_quaternion.cpp @@ -49,6 +49,7 @@ template void quaternion(void) */ using std::abs; typedef Matrix Vector3; + typedef Matrix Matrix3; typedef Matrix Vector4; typedef Quaternion Quaternionx; typedef AngleAxis AngleAxisx; @@ -101,6 +102,11 @@ template void quaternion(void) q2 = q1.toRotationMatrix(); VERIFY_IS_APPROX(q1*v1,q2*v1); + Matrix3 rot1(q1); + VERIFY_IS_APPROX(q1*v1,rot1*v1); + Quaternionx q3(rot1.transpose()*rot1); + VERIFY_IS_APPROX(q3*v1,v1); + // angle-axis conversion AngleAxisx aa = AngleAxisx(q1); From 274b2272b77fd89bc4151f3ac5e7ccc5f0fad859 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 1 Dec 2015 09:57:31 +0100 Subject: [PATCH 280/344] Make bench_gemm compatible with 3.2 --- bench/bench_gemm.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bench/bench_gemm.cpp b/bench/bench_gemm.cpp index 0974ebe4c..8528c5587 100644 --- a/bench/bench_gemm.cpp +++ b/bench/bench_gemm.cpp @@ -203,9 +203,10 @@ int main(int argc, char ** argv) return 1; } - if(cache_size1>0) - setCpuCacheSizes(cache_size1,cache_size2,cache_size3); - +#if EIGEN_VERSION_AT_LEAST(3,2,90) + if(cache_size1>0) + setCpuCacheSizes(cache_size1,cache_size2,cache_size3); +#endif A a(m,p); a.setRandom(); B b(p,n); b.setRandom(); From 1663d15da7daf6cea77b6d0072849e77428db7a4 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Mon, 30 Nov 2015 13:39:24 -0800 Subject: [PATCH 281/344] Add internal method _solve_impl_transposed() to LU decomposition classes that solves A^T x = b or A^* x = b. --- Eigen/src/LU/FullPivLU.h | 90 ++++++++++++++++++++++++++++++++----- Eigen/src/LU/PartialPivLU.h | 45 +++++++++++++++---- test/lu.cpp | 36 +++++++++++++-- 3 files changed, 148 insertions(+), 23 deletions(-) diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 498df8adc..4691efd2f 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -10,7 +10,7 @@ #ifndef EIGEN_LU_H #define EIGEN_LU_H -namespace Eigen { +namespace Eigen { namespace internal { template struct traits > @@ -384,22 +384,26 @@ template class FullPivLU inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; + + template + EIGEN_DEVICE_FUNC + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const; #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + void computeInPlace(); - + MatrixType m_lu; PermutationPType m_p; PermutationQType m_q; @@ -447,15 +451,15 @@ template FullPivLU& FullPivLU::compute(const EigenBase& matrix) { check_template_parameters(); - + // the permutations are stored as int indices, so just to be sure: eigen_assert(matrix.rows()<=NumTraits::highest() && matrix.cols()<=NumTraits::highest()); - + m_isInitialized = true; m_lu = matrix.derived(); - + computeInPlace(); - + return *this; } @@ -709,7 +713,7 @@ struct image_retval > template template void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const -{ +{ /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. * So we proceed as follows: * Step 1: compute c = P * rhs. @@ -753,6 +757,70 @@ void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const for(Index i = nonzero_pivots; i < m_lu.cols(); ++i) dst.row(permutationQ().indices().coeff(i)).setZero(); } + +template +template +void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const +{ + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}, + * and since permutations are real and unitary, we can write this + * as A^T = Q U^T L^T P, + * So we proceed as follows: + * Step 1: compute c = Q^T rhs. + * Step 2: replace c by the solution x to U^T x = c. May or may not exist. + * Step 3: replace c by the solution x to L^T x = c. + * Step 4: result = P^T c. + * If Conjugate is true, replace "^T" by "^*" above. + */ + + const Index rows = this->rows(), cols = this->cols(), + nonzero_pivots = this->rank(); + eigen_assert(rhs.rows() == cols); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) + { + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationQ().inverse() * rhs; + + if (Conjugate) { + // Step 2 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView() + .adjoint() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) + .template triangularView() + .adjoint() + .solveInPlace(c.topRows(smalldim)); + } else { + // Step 2 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView() + .transpose() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) + .template triangularView() + .transpose() + .solveInPlace(c.topRows(smalldim)); + } + + // Step 4 + PermutationPType invp = permutationP().inverse().eval(); + for(Index i = 0; i < smalldim; ++i) + dst.row(invp.indices().coeff(i)) = c.row(i); + for(Index i = smalldim; i < rows; ++i) + dst.row(invp.indices().coeff(i)).setZero(); +} + #endif namespace internal { @@ -765,7 +833,7 @@ struct Assignment >, internal::assign_ typedef FullPivLU LuType; typedef Inverse SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { + { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 2c28818a3..91abbc341 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -11,7 +11,7 @@ #ifndef EIGEN_PARTIALLU_H #define EIGEN_PARTIALLU_H -namespace Eigen { +namespace Eigen { namespace internal { template struct traits > @@ -185,7 +185,7 @@ template class PartialPivLU inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template EIGEN_DEVICE_FUNC @@ -206,17 +206,44 @@ template class PartialPivLU m_lu.template triangularView().solveInPlace(dst); // Step 3 - m_lu.template triangularView().solveInPlace(dst); + m_lu.template triangularView().solveInPlace(dst); + } + + template + EIGEN_DEVICE_FUNC + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + eigen_assert(rhs.rows() == m_lu.cols()); + + if (Conjugate) { + // Step 1 + dst = m_lu.template triangularView().adjoint().solve(rhs); + // Step 2 + m_lu.template triangularView().adjoint().solveInPlace(dst); + } else { + // Step 1 + dst = m_lu.template triangularView().transpose().solve(rhs); + // Step 2 + m_lu.template triangularView().transpose().solveInPlace(dst); + } + // Step 3 + dst = permutationP().transpose() * dst; } #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; @@ -295,7 +322,7 @@ struct partial_lu_impl { Index rrows = rows-k-1; Index rcols = cols-k-1; - + Index row_of_biggest_in_col; Score biggest_in_corner = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col); @@ -436,10 +463,10 @@ template PartialPivLU& PartialPivLU::compute(const EigenBase& matrix) { check_template_parameters(); - + // the row permutation is stored as int indices, so just to be sure: eigen_assert(matrix.rows()::highest()); - + m_lu = matrix.derived(); eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); @@ -492,7 +519,7 @@ struct Assignment >, internal::assi typedef PartialPivLU LuType; typedef Inverse SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) - { + { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; diff --git a/test/lu.cpp b/test/lu.cpp index b90367438..f753fc74a 100644 --- a/test/lu.cpp +++ b/test/lu.cpp @@ -92,6 +92,20 @@ template void lu_non_invertible() // test that the code, which does resize(), may be applied to an xpr m2.block(0,0,m2.rows(),m2.cols()) = lu.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); + + // test solve with transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.transpose()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.adjoint()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); } template void lu_invertible() @@ -124,6 +138,12 @@ template void lu_invertible() m2 = lu.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); VERIFY_IS_APPROX(m2, lu.inverse()*m3); + // test solve with transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + // test solve with conjugate transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); // Regression test for Bug 302 MatrixType m4 = MatrixType::Random(size,size); @@ -136,14 +156,24 @@ template void lu_partial_piv() PartialPivLU.h */ typedef typename MatrixType::Index Index; - Index rows = internal::random(1,4); - Index cols = rows; + Index size = internal::random(1,4); - MatrixType m1(cols, rows); + MatrixType m1(size, size), m2(size, size), m3(size, size); m1.setRandom(); PartialPivLU plu(m1); VERIFY_IS_APPROX(m1, plu.reconstructedMatrix()); + + m3 = MatrixType::Random(size,size); + m2 = plu.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + VERIFY_IS_APPROX(m2, plu.inverse()*m3); + // test solve with transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + // test solve with conjugate transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); } template void lu_verify_assert() From 0bb12fa61437e55ce563d076938593bebff7f0fc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 1 Dec 2015 14:38:47 +0100 Subject: [PATCH 282/344] Add LU::transpose().solve() and LU::adjoint().solve() API. --- Eigen/Core | 1 + Eigen/src/CholmodSupport/CholmodSupport.h | 4 + Eigen/src/Core/CoreEvaluators.h | 1 + Eigen/src/Core/Inverse.h | 24 ++-- Eigen/src/Core/Solve.h | 27 +++- Eigen/src/Core/SolverBase.h | 130 ++++++++++++++++++ Eigen/src/Core/Transpose.h | 2 +- Eigen/src/Core/util/Constants.h | 4 + Eigen/src/Core/util/ForwardDeclarations.h | 1 + .../BasicPreconditioners.h | 6 +- .../IncompleteCholesky.h | 5 +- .../IterativeLinearSolvers/IncompleteLUT.h | 8 +- .../IterativeSolverBase.h | 5 + Eigen/src/LU/FullPivLU.h | 16 +-- Eigen/src/LU/PartialPivLU.h | 27 ++-- Eigen/src/PaStiXSupport/PaStiXSupport.h | 4 + Eigen/src/SPQRSupport/SuiteSparseQRSupport.h | 4 + Eigen/src/SparseCholesky/SimplicialCholesky.h | 5 + Eigen/src/SparseLU/SparseLU.h | 5 + Eigen/src/SparseQR/SparseQR.h | 6 + Eigen/src/SuperLUSupport/SuperLUSupport.h | 4 + Eigen/src/UmfPackSupport/UmfPackSupport.h | 4 + test/lu.cpp | 22 +++ 23 files changed, 267 insertions(+), 48 deletions(-) create mode 100644 Eigen/src/Core/SolverBase.h diff --git a/Eigen/Core b/Eigen/Core index 7cf431320..1ec749452 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -391,6 +391,7 @@ using std::ptrdiff_t; #include "src/Core/GeneralProduct.h" #include "src/Core/Solve.h" #include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" #include "src/Core/PermutationMatrix.h" #include "src/Core/Transpositions.h" #include "src/Core/TriangularMatrix.h" diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index f33aa9bf1..06421d5ed 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -170,6 +170,10 @@ class CholmodBase : public SparseSolverBase typedef typename MatrixType::RealScalar RealScalar; typedef MatrixType CholMatrixType; typedef typename MatrixType::StorageIndex StorageIndex; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index a8b359085..42ad452f7 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -29,6 +29,7 @@ struct storage_kind_to_evaluator_kind { template struct storage_kind_to_shape; template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; +template<> struct storage_kind_to_shape { typedef SolverShape Shape; }; template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; template<> struct storage_kind_to_shape { typedef TranspositionsShape Shape; }; diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index 8ba1a12d9..f3ec84990 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -48,6 +48,7 @@ public: typedef typename internal::ref_selector::type XprTypeNested; typedef typename internal::remove_all::type XprTypeNestedCleaned; typedef typename internal::ref_selector::type Nested; + typedef typename internal::remove_all::type NestedExpression; explicit Inverse(const XprType &xpr) : m_xpr(xpr) @@ -62,25 +63,16 @@ protected: XprTypeNested m_xpr; }; -/** \internal - * Specialization of the Inverse expression for dense expressions. - * Direct access to the coefficients are discared. - * FIXME this intermediate class is probably not needed anymore. - */ -template -class InverseImpl - : public MatrixBase > +// Generic API dispatcher +template +class InverseImpl + : public internal::generic_xpr_base >::type { - typedef Inverse Derived; - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - typedef typename internal::remove_all::type NestedExpression; - + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; private: - + Scalar coeff(Index row, Index col) const; Scalar coeff(Index i) const; }; diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index 2d163fe2a..ba2ee53b8 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -34,12 +34,11 @@ template struct s template struct solve_traits { - typedef typename Decomposition::MatrixType MatrixType; typedef Matrix PlainObject; }; @@ -145,6 +144,28 @@ struct Assignment, internal::assign_op +struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> +{ + typedef Solve,RhsType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + src.dec().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); + } +}; + +// Specialization for "dst = dec.adjoint().solve(rhs)" +template +struct Assignment, const Transpose >,RhsType>, internal::assign_op, Dense2Dense, Scalar> +{ + typedef Solve, const Transpose >,RhsType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); + } +}; + } // end namepsace internal } // end namespace Eigen diff --git a/Eigen/src/Core/SolverBase.h b/Eigen/src/Core/SolverBase.h new file mode 100644 index 000000000..8a4adc229 --- /dev/null +++ b/Eigen/src/Core/SolverBase.h @@ -0,0 +1,130 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SOLVERBASE_H +#define EIGEN_SOLVERBASE_H + +namespace Eigen { + +namespace internal { + + + +} // end namespace internal + +/** \class SolverBase + * \brief A base class for matrix decomposition and solvers + * + * \tparam Derived the actual type of the decomposition/solver. + * + * Any matrix decomposition inheriting this base class provide the following API: + * + * \code + * MatrixType A, b, x; + * DecompositionType dec(A); + * x = dec.solve(b); // solve A * x = b + * x = dec.transpose().solve(b); // solve A^T * x = b + * x = dec.adjoint().solve(b); // solve A' * x = b + * \endcode + * + * \warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation errors. + * + * \sa class PartialPivLU, class FullPivLU + */ +template +class SolverBase : public EigenBase +{ + public: + + typedef EigenBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef Scalar CoeffReturnType; + + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, + internal::traits::ColsAtCompileTime>::ret), + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + MaxSizeAtCompileTime = (internal::size_at_compile_time::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime>::ret), + IsVectorAtCompileTime = internal::traits::MaxRowsAtCompileTime == 1 + || internal::traits::MaxColsAtCompileTime == 1 + }; + + /** Default constructor */ + SolverBase() + {} + + ~SolverBase() + {} + + using Base::derived; + + /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A. + */ + template + inline const Solve + solve(const MatrixBase& b) const + { + eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b"); + return Solve(derived(), b.derived()); + } + + /** \internal the return type of transpose() */ + typedef typename internal::add_const >::type ConstTransposeReturnType; + /** \returns an expression of the transposed of the factored matrix. + * + * A typical usage is to solve for the transposed problem A^T x = b: + * \code x = dec.transpose().solve(b); \endcode + * + * \sa adjoint(), solve() + */ + inline ConstTransposeReturnType transpose() const + { + return ConstTransposeReturnType(derived()); + } + + /** \internal the return type of adjoint() */ + typedef typename internal::conditional::IsComplex, + CwiseUnaryOp, ConstTransposeReturnType>, + ConstTransposeReturnType + >::type AdjointReturnType; + /** \returns an expression of the adjoint of the factored matrix + * + * A typical usage is to solve for the adjoint problem A' x = b: + * \code x = dec.adjoint().solve(b); \endcode + * + * For real scalar types, this function is equivalent to transpose(). + * + * \sa transpose(), solve() + */ + inline AdjointReturnType adjoint() const + { + return AdjointReturnType(derived().transpose()); + } + + protected: +}; + +namespace internal { + +template +struct generic_xpr_base +{ + typedef SolverBase type; + +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SOLVERBASE_H diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 2152405d5..5b66eb5e1 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -39,7 +39,7 @@ struct traits > : public traits MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, - Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), + Flags0 = traits::Flags & ~(LvalueBit | NestByRefBit), Flags1 = Flags0 | FlagsLvalueBit, Flags = Flags1 ^ RowMajorBit, InnerStrideAtCompileTime = inner_stride_at_compile_time::ret, diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 28852c8c3..a364f48d1 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -492,6 +492,9 @@ struct Dense {}; /** The type used to identify a general sparse storage. */ struct Sparse {}; +/** The type used to identify a general solver (foctored) storage. */ +struct SolverStorage {}; + /** The type used to identify a permutation storage. */ struct PermutationStorage {}; @@ -506,6 +509,7 @@ struct ArrayXpr {}; // An evaluator must define its shape. By default, it can be one of the following: struct DenseShape { static std::string debugName() { return "DenseShape"; } }; +struct SolverShape { static std::string debugName() { return "SolverShape"; } }; struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } }; struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } }; struct BandShape { static std::string debugName() { return "BandShape"; } }; diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 1aa81abf8..483af876f 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -132,6 +132,7 @@ template struct CommaInitializer; template class ReturnByValue; template class ArrayWrapper; template class MatrixWrapper; +template class SolverBase; template class InnerIterator; namespace internal { diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index b850630a3..358444aff 100644 --- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -39,8 +39,10 @@ class DiagonalPreconditioner typedef Matrix Vector; public: typedef typename Vector::StorageIndex StorageIndex; - // this typedef is only to export the scalar type and compile-time dimensions to solve_retval - typedef Matrix MatrixType; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; DiagonalPreconditioner() : m_isInitialized(false) {} diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h index 8f549af82..284e37f13 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h @@ -57,12 +57,15 @@ class IncompleteCholesky : public SparseSolverBase FactorType; - typedef FactorType MatrixType; typedef Matrix VectorSx; typedef Matrix VectorRx; typedef Matrix VectorIx; typedef std::vector > VectorList; enum { UpLo = _UpLo }; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; public: /** Default constructor leaving the object in a partly non-initialized stage. diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index 519472377..338e6f10a 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -109,11 +109,13 @@ class IncompleteLUT : public SparseSolverBase VectorI; typedef SparseMatrix FactorType; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; + public: - // this typedef is only to export the scalar type and compile-time dimensions to solve_retval - typedef Matrix MatrixType; - IncompleteLUT() : m_droptol(NumTraits::dummy_precision()), m_fillfactor(10), m_analysisIsOk(false), m_factorizationIsOk(false) diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h index 5f4bcea11..e51ff7280 100644 --- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h @@ -31,6 +31,11 @@ public: typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::RealScalar RealScalar; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + public: using Base::derived; diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 4691efd2f..0c4d63923 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -16,6 +16,8 @@ namespace internal { template struct traits > : traits<_MatrixType> { + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; enum { Flags = 0 }; }; @@ -53,21 +55,18 @@ template struct traits > * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() */ template class FullPivLU + : public SolverBase > { public: typedef _MatrixType MatrixType; + typedef SolverBase Base; + + EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef typename internal::traits::StorageKind StorageKind; - // FIXME should be int - typedef typename MatrixType::StorageIndex StorageIndex; typedef typename internal::plain_row_type::type IntRowVectorType; typedef typename internal::plain_col_type::type IntColVectorType; typedef PermutationMatrix PermutationQType; @@ -223,6 +222,7 @@ template class FullPivLU * * \sa TriangularView::solve(), kernel(), inverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template inline const Solve solve(const MatrixBase& b) const diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index 91abbc341..50e920609 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -17,6 +17,8 @@ namespace internal { template struct traits > : traits<_MatrixType> { + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; typedef traits<_MatrixType> BaseTraits; enum { Flags = BaseTraits::Flags & RowMajorBit, @@ -58,33 +60,29 @@ template struct traits > * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU */ template class PartialPivLU + : public SolverBase > { public: typedef _MatrixType MatrixType; + typedef SolverBase Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef typename internal::traits::StorageKind StorageKind; - // FIXME should be int - typedef typename MatrixType::StorageIndex StorageIndex; typedef PermutationMatrix PermutationType; typedef Transpositions TranspositionType; typedef typename MatrixType::PlainObject PlainObject; /** - * \brief Default Constructor. - * - * The default constructor is useful in cases in which the user intends to - * perform decompositions via PartialPivLU::compute(const MatrixType&). - */ + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via PartialPivLU::compute(const MatrixType&). + */ PartialPivLU(); /** \brief Default Constructor with memory preallocation @@ -145,6 +143,7 @@ template class PartialPivLU * * \sa TriangularView::solve(), inverse(), computeInverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template inline const Solve solve(const MatrixBase& b) const @@ -508,7 +507,7 @@ MatrixType PartialPivLU::reconstructedMatrix() const return res; } -/***** Implementation of solve() *****************************************************/ +/***** Implementation details *****************************************************/ namespace internal { diff --git a/Eigen/src/PaStiXSupport/PaStiXSupport.h b/Eigen/src/PaStiXSupport/PaStiXSupport.h index cec4149e7..1999fd289 100644 --- a/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -141,6 +141,10 @@ class PastixBase : public SparseSolverBase typedef typename MatrixType::StorageIndex StorageIndex; typedef Matrix Vector; typedef SparseMatrix ColSpMatrix; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: diff --git a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h index d2f053fa5..d9c3113e7 100644 --- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h @@ -68,6 +68,10 @@ class SPQR : public SparseSolverBase > typedef SuiteSparse_long StorageIndex ; typedef SparseMatrix MatrixType; typedef Map > PermutationType; + enum { + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic + }; public: SPQR() : m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits::epsilon()), m_useDefaultThreshold(true) diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky.h b/Eigen/src/SparseCholesky/SimplicialCholesky.h index ef612cf45..1343eb15c 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h @@ -71,6 +71,11 @@ class SimplicialCholeskyBase : public SparseSolverBase typedef Matrix VectorType; typedef Matrix VectorI; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + public: using Base::derived; diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 73368cba4..acd3ad100 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -90,6 +90,11 @@ class SparseLU : public SparseSolverBase >, typedef Matrix IndexVector; typedef PermutationMatrix PermutationType; typedef internal::SparseLUImpl Base; + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index bbd337c40..4f26c19ca 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -84,6 +84,12 @@ class SparseQR : public SparseSolverBase > typedef Matrix IndexVector; typedef Matrix ScalarVector; typedef PermutationMatrix PermutationType; + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + public: SparseQR () : m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) { } diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index c145e25bd..b20da37f7 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -304,6 +304,10 @@ class SuperLUBase : public SparseSolverBase typedef Matrix IntColVectorType; typedef Map > PermutationMap; typedef SparseMatrix LUMatrixType; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index caac082f3..aaec8c6f1 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -146,6 +146,10 @@ class UmfPackLU : public SparseSolverBase > typedef SparseMatrix LUMatrixType; typedef SparseMatrix UmfpackMatrixType; typedef Ref UmfpackMatrixRef; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: diff --git a/test/lu.cpp b/test/lu.cpp index f753fc74a..f14435114 100644 --- a/test/lu.cpp +++ b/test/lu.cpp @@ -99,6 +99,9 @@ template void lu_non_invertible() m3 = MatrixType::Random(rows,cols2); lu.template _solve_impl_transposed(m2, m3); VERIFY_IS_APPROX(m2, m1.transpose()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); // test solve with conjugate transposed m3 = MatrixType::Random(rows,cols2); @@ -106,6 +109,9 @@ template void lu_non_invertible() m3 = MatrixType::Random(rows,cols2); lu.template _solve_impl_transposed(m2, m3); VERIFY_IS_APPROX(m2, m1.adjoint()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); } template void lu_invertible() @@ -138,12 +144,20 @@ template void lu_invertible() m2 = lu.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); VERIFY_IS_APPROX(m2, lu.inverse()*m3); + // test solve with transposed lu.template _solve_impl_transposed(m3, m2); VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + // test solve with conjugate transposed lu.template _solve_impl_transposed(m3, m2); VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); // Regression test for Bug 302 MatrixType m4 = MatrixType::Random(size,size); @@ -168,12 +182,20 @@ template void lu_partial_piv() m2 = plu.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); VERIFY_IS_APPROX(m2, plu.inverse()*m3); + // test solve with transposed plu.template _solve_impl_transposed(m3, m2); VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + // test solve with conjugate transposed plu.template _solve_impl_transposed(m3, m2); VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); } template void lu_verify_assert() From c5b86893e7cca7d870b928ad8cc109ccc131b456 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 1 Dec 2015 14:45:08 +0100 Subject: [PATCH 283/344] bug #1123: add missing documentation of angle() and axis() --- Eigen/src/Geometry/AngleAxis.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h index a5484ba77..7fdb8ae83 100644 --- a/Eigen/src/Geometry/AngleAxis.h +++ b/Eigen/src/Geometry/AngleAxis.h @@ -85,10 +85,17 @@ public: template inline explicit AngleAxis(const MatrixBase& m) { *this = m; } + /** \returns the value of the rotation angle in radian */ Scalar angle() const { return m_angle; } + /** \returns a read-write reference to the stored angle in radian */ Scalar& angle() { return m_angle; } + /** \returns the rotation axis */ const Vector3& axis() const { return m_axis; } + /** \returns a read-write reference to the stored rotation axis. + * + * \warning The rotation axis must remain a \b unit vector. + */ Vector3& axis() { return m_axis; } /** Concatenates two rotations */ From d2d4c45d55afc7294baa292dd6fc0440dee0a04b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 2 Dec 2015 17:21:33 -0800 Subject: [PATCH 284/344] Made it possible to leverage several binary functor in a CUDA kernel Explicitely specified the return type of the various scalar_cmp_op functors. --- Eigen/src/Core/functors/BinaryFunctors.h | 29 +++++++++++++++--------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 90d8b7d1d..4962d625c 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -29,7 +29,7 @@ template struct scalar_sum_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::padd(a,b); } template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const { return internal::predux(a); } }; template @@ -68,7 +68,7 @@ template struct scalar_product_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmul(a,b); } template - EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_mul(a); } }; template @@ -175,30 +175,37 @@ struct result_of(Scalar,Scalar)> { template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} }; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} }; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} }; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} }; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} }; template struct scalar_cmp_op { + typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} }; @@ -448,9 +455,9 @@ struct functor_traits > */ template struct scalar_sub_op { - inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } - inline scalar_sub_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return a - m_other; } + EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } + EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { } + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; } template EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::psub(a, pset1(m_other)); } @@ -466,9 +473,9 @@ struct functor_traits > */ template struct scalar_rsub_op { - inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } - inline scalar_rsub_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return m_other - a; } + EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } + EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { } + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; } template EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const { return internal::psub(pset1(m_other), a); } @@ -485,8 +492,8 @@ struct functor_traits > template struct scalar_pow_op { // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} + EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } + EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } const Scalar m_exponent; @@ -501,7 +508,7 @@ struct functor_traits > */ template struct scalar_inverse_mult_op { - scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} + EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; } template EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const From 944647c0aac7e14fb4cb50cf022c21e1940ac212 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov Date: Thu, 3 Dec 2015 15:21:43 +0000 Subject: [PATCH 285/344] Fixes internal compiler error while compiling with VC2015 Update1 x64. --- Eigen/src/Core/products/GeneralMatrixMatrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 3fba82ff8..d830dfb96 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -149,7 +149,7 @@ static void run(Index rows, Index cols, Index depth, { for(Index i=0; i Date: Thu, 3 Dec 2015 22:25:26 +0100 Subject: [PATCH 286/344] Add missing Rotation2D::operator=(Matrix2x2) --- Eigen/src/Geometry/Rotation2D.h | 21 +++++++++++++++++++++ test/geo_transformations.cpp | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h index 65aa83be5..8b0ddcfb0 100644 --- a/Eigen/src/Geometry/Rotation2D.h +++ b/Eigen/src/Geometry/Rotation2D.h @@ -64,6 +64,16 @@ public: /** Default constructor wihtout initialization. The represented rotation is undefined. */ Rotation2D() {} + /** Construct a 2D rotation from a 2x2 rotation matrix \a mat. + * + * \sa fromRotationMatrix() + */ + template + explicit Rotation2D(const MatrixBase& m) + { + fromRotationMatrix(m.derived()); + } + /** \returns the rotation angle */ inline Scalar angle() const { return m_angle; } @@ -103,6 +113,17 @@ public: Rotation2D& fromRotationMatrix(const MatrixBase& m); Matrix2 toRotationMatrix() const; + /** Set \c *this from a 2x2 rotation matrix \a mat. + * In other words, this function extract the rotation angle from the rotation matrix. + * + * This method is an alias for fromRotationMatrix() + * + * \sa fromRotationMatrix() + */ + template + Rotation2D& operator=(const MatrixBase& m) + { return fromRotationMatrix(m.derived()); } + /** \returns the spherical interpolation between \c *this and \a other using * parameter \a t. It is in fact equivalent to a linear interpolation. */ diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp index 94ed155ef..51f90036d 100644 --- a/test/geo_transformations.cpp +++ b/test/geo_transformations.cpp @@ -430,6 +430,10 @@ template void transformations() VERIFY( rot2.smallestAngle() >= -Scalar(EIGEN_PI) ); VERIFY( rot2.smallestAngle() <= Scalar(EIGEN_PI) ); VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot2.angle()) ); + + Matrix rot2_as_mat(rot2); + Rotation2D rot3(rot2_as_mat); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot3.angle()) ); } s0 = internal::random(-100,100); From 029052d276edab49f6e8c274972d51f4188b5a09 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 3 Dec 2015 17:08:47 -0800 Subject: [PATCH 287/344] Deleted redundant code --- .../Eigen/CXX11/src/Tensor/TensorDevice.h | 86 ------------------- 1 file changed, 86 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h index 7b2485fb7..29e50a3b2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -63,92 +63,6 @@ template class TensorDevice { ExpressionType& m_expression; }; - -#ifdef EIGEN_USE_THREADS -template class TensorDevice { - public: - TensorDevice(const ThreadPoolDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp Assign; - Assign assign(m_expression, other); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - protected: - const ThreadPoolDevice& m_device; - ExpressionType& m_expression; -}; -#endif - - -#if defined(EIGEN_USE_GPU) -template class TensorDevice -{ - public: - TensorDevice(const GpuDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp Assign; - Assign assign(m_expression, other); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - template - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor::run(assign, m_device); - return *this; - } - - protected: - const GpuDevice& m_device; - ExpressionType& m_expression; -}; -#endif - - } // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H From e25e3a041bc41266df31f48a441d95b38fc5a6fc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 3 Dec 2015 18:16:35 -0800 Subject: [PATCH 288/344] Added rsqrt() method to the Array class: this method computes the coefficient-wise inverse square root much more efficiently than calling sqrt().inverse(). --- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 17 +++++++++++++++++ test/array.cpp | 5 +++++ 2 files changed, 22 insertions(+) diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index a9310f12d..45e826b0c 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -4,6 +4,7 @@ typedef CwiseUnaryOp, const Derived> AbsReturnTy typedef CwiseUnaryOp, const Derived> ArgReturnType; typedef CwiseUnaryOp, const Derived> Abs2ReturnType; typedef CwiseUnaryOp, const Derived> SqrtReturnType; +typedef CwiseUnaryOp, const Derived> RsqrtReturnType; typedef CwiseUnaryOp, const Derived> SignReturnType; typedef CwiseUnaryOp, const Derived> InverseReturnType; typedef CwiseUnaryOp, const Derived> BooleanNotReturnType; @@ -139,6 +140,22 @@ sqrt() const return SqrtReturnType(derived()); } +/** \returns an expression of the coefficient-wise inverse square root of *this. + * + * This function computes the coefficient-wise inverse square root. + * + * Example: \include Cwise_sqrt.cpp + * Output: \verbinclude Cwise_sqrt.out + * + * \sa pow(), square() + */ +EIGEN_DEVICE_FUNC +inline const RsqrtReturnType +rsqrt() const +{ + return RsqrtReturnType(derived()); +} + /** \returns an expression of the coefficient-wise signum of *this. * * This function computes the coefficient-wise signum. diff --git a/test/array.cpp b/test/array.cpp index 367bda2c4..5395721f5 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -236,6 +236,7 @@ template void array_real(const ArrayType& m) // avoid NaNs with abs() so verification doesn't fail m3 = m1.abs(); VERIFY_IS_APPROX(m3.sqrt(), sqrt(abs(m1))); + VERIFY_IS_APPROX(m3.rsqrt(), Scalar(1)/sqrt(abs(m1))); VERIFY_IS_APPROX(m3.log(), log(m3)); VERIFY_IS_APPROX(m3.log10(), log10(m3)); @@ -292,6 +293,10 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); + + VERIFY_IS_APPROX(m3.pow(RealScalar(-0.5)), m3.rsqrt()); + VERIFY_IS_APPROX(pow(m3,RealScalar(-0.5)), m3.rsqrt()); + VERIFY_IS_APPROX(log10(m3), log(m3)/log(10)); // scalar by array division From d20efc974d53f513668c0138e945171f6f4d85e8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 4 Dec 2015 09:38:15 -0800 Subject: [PATCH 289/344] Made it possible to use the sigmoid functor within a CUDA kernel. --- unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 7ba0c2817..34ba4e392 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -40,8 +40,8 @@ struct scalar_sigmoid_op { return one / (one + std::exp(-x)); } - template - inline Packet packetOp(const Packet& x) const { + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(const Packet& x) const { const Packet one = pset1(1); return pdiv(one, padd(one, pexp(pnegate(x)))); } From 490d26e4c14554716298c3bc4123571bbf92a1b2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 4 Dec 2015 10:15:11 -0800 Subject: [PATCH 290/344] Use integers instead of std::size_t to encode the number of dimensions in the Tensor class since most of the code currently already use integers. --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 12 ++++++------ unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 6 +++--- .../CXX11/src/Tensor/TensorForwardDeclarations.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 0df1345c2..6d357545c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -59,7 +59,7 @@ namespace Eigen { * \ref TopicStorageOrders */ -template +template class Tensor : public TensorBase > { public: @@ -82,7 +82,7 @@ class Tensor : public TensorBase Dimensions; protected: @@ -433,7 +433,7 @@ class Tensor : public TensorBase& dimensions) { - std::size_t i; + int i; Index size = Index(1); for (i = 0; i < NumIndices; i++) { internal::check_rows_cols_for_overflow::run(size, dimensions[i]); @@ -451,7 +451,7 @@ class Tensor : public TensorBase& dimensions) { array dims; - for (std::size_t i = 0; i < NumIndices; ++i) { + for (int i = 0; i < NumIndices; ++i) { dims[i] = dimensions[i]; } resize(dims); @@ -480,7 +480,7 @@ class Tensor : public TensorBase& dimensions) { array dims; - for (std::size_t i = 0; i < NumIndices; ++i) { + for (int i = 0; i < NumIndices; ++i) { dims[i] = static_cast(dimensions[i]); } resize(dims); @@ -490,7 +490,7 @@ class Tensor : public TensorBase& dimensions) { array dims; - for (std::size_t i = 0; i < NumIndices; ++i) { + for (int i = 0; i < NumIndices; ++i) { dims[i] = static_cast(dimensions[i]); } resize(dims); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 5ec1deaf8..d1ce3d0ed 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -638,8 +638,8 @@ class TensorBase } protected: - template friend class Tensor; - template friend class TensorFixedSize; + template friend class Tensor; + template friend class TensorFixedSize; template friend class TensorBase; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } @@ -655,7 +655,7 @@ class TensorBase : public TensorBase::type PacketReturnType; static const int NumDimensions = DerivedTraits::NumDimensions; - template friend class Tensor; + template friend class Tensor; template friend class TensorFixedSize; template friend class TensorBase; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index fbeb9c59a..a8bd8b888 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -12,7 +12,7 @@ namespace Eigen { -template class Tensor; +template class Tensor; template class TensorFixedSize; template class TensorMap; template class TensorRef; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 8f1c02ea4..7a9568b36 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -44,7 +44,7 @@ class compute_tensor_flags }; -template +template struct traits > { typedef Scalar_ Scalar; @@ -107,13 +107,13 @@ struct traits > }; -template +template struct eval, Eigen::Dense> { typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; }; -template +template struct eval, Eigen::Dense> { typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; @@ -161,13 +161,13 @@ template struct nested typedef typename ref_selector::type type; }; -template +template struct nested > { typedef const Tensor& type; }; -template +template struct nested > { typedef const Tensor& type; From f4ca8ad9178b5fa1b83697e1a645e55d65df5639 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 4 Dec 2015 18:14:16 -0800 Subject: [PATCH 291/344] Use signed integers instead of unsigned ones more consistently in the codebase. --- .../Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 12 ++++++------ unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index fa62b25c2..9e208934b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -495,7 +495,7 @@ struct TensorContractionEvaluatorBase internal::array_size::Dimensions>::value; static const int RDims = internal::array_size::Dimensions>::value; - static const unsigned int ContractDims = internal::array_size::value; + static const int ContractDims = internal::array_size::value; static const int NumDims = max_n_1::size; typedef array left_dim_mapper_t; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 114871f49..f3c9a3148 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -265,10 +265,10 @@ struct tensor_index_linearization_helper // Dynamic size -template +template struct DSizes : array { typedef array Base; - static const std::size_t count = NumDims; + static const int count = NumDims; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { return NumDims; @@ -278,8 +278,8 @@ struct DSizes : array { return internal::array_prod(*static_cast(this)); } - EIGEN_DEVICE_FUNC DSizes() { - for (std::size_t i = 0 ; i < NumDims; ++i) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { + for (int i = 0 ; i < NumDims; ++i) { (*this)[i] = 0; } } @@ -369,10 +369,10 @@ struct tensor_vsize_index_linearization_helper namespace internal { -template struct array_size > { +template struct array_size > { static const size_t value = NumDims; }; -template struct array_size > { +template struct array_size > { static const size_t value = NumDims; }; #ifndef EIGEN_EMULATE_CXX11_META_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index ee6f14b8f..98631fc7f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -63,7 +63,7 @@ class TensorStorage // pure dynamic -template +template class TensorStorage, Options_> { public: From b37036afce20e902cd5191a2a985f39b1f7e22e3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 7 Dec 2015 12:23:22 +0100 Subject: [PATCH 292/344] Implement wrapper for matrix-free iterative solvers --- Eigen/src/Core/util/Meta.h | 1 - Eigen/src/Core/util/StaticAssert.h | 3 +- Eigen/src/IterativeLinearSolvers/BiCGSTAB.h | 4 +- .../ConjugateGradient.h | 21 ++- .../IterativeSolverBase.h | 162 +++++++++++++++--- .../LeastSquareConjugateGradient.h | 4 +- unsupported/Eigen/IterativeSolvers | 1 + .../Eigen/src/IterativeSolvers/DGMRES.h | 16 +- .../Eigen/src/IterativeSolvers/GMRES.h | 4 +- .../Eigen/src/IterativeSolvers/MINRES.h | 24 ++- 10 files changed, 189 insertions(+), 51 deletions(-) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 15b80abd9..3dee2bd7c 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -237,7 +237,6 @@ protected: EIGEN_DEVICE_FUNC ~noncopyable() {} }; - /** \internal * Convenient struct to get the result type of a unary or binary functor. * diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index f35ddb372..108181419 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -95,7 +95,8 @@ IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, STORAGE_LAYOUT_DOES_NOT_MATCH, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, - THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY }; }; diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index 4be00da47..191202138 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -156,7 +156,7 @@ template< typename _MatrixType, typename _Preconditioner> class BiCGSTAB : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -198,7 +198,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - if(!internal::bicgstab(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_error)) + if(!internal::bicgstab(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error)) failed = true; } m_info = failed ? NumericalIssue diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index dbedf28fd..395daa8e4 100644 --- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -149,13 +149,15 @@ struct traits > * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * ConjugateGradient can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class LeastSquaresConjugateGradient, class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, int _UpLo, typename _Preconditioner> class ConjugateGradient : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -194,12 +196,19 @@ public: template void _solve_with_guess_impl(const Rhs& b, Dest& x) const { - typedef Ref MatRef; - typedef typename internal::conditional::IsComplex), - Transpose, MatRef const&>::type RowMajorWrapper; + typedef typename Base::MatrixWrapper MatrixWrapper; + typedef typename Base::ActualMatrixType ActualMatrixType; + enum { + TransposeInput = (!MatrixWrapper::MatrixFree) + && (UpLo==(Lower|Upper)) + && (!MatrixType::IsRowMajor) + && (!NumTraits::IsComplex) + }; + typedef typename internal::conditional, ActualMatrixType const&>::type RowMajorWrapper; + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY); typedef typename internal::conditional::Type + typename MatrixWrapper::template ConstSelfAdjointViewReturnType::Type >::type SelfAdjointWrapper; m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; @@ -210,7 +219,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - RowMajorWrapper row_mat(mp_matrix); + RowMajorWrapper row_mat(matrix()); internal::conjugate_gradient(SelfAdjointWrapper(row_mat), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); } diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h index e51ff7280..3d62fef6e 100644 --- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h @@ -12,6 +12,128 @@ namespace Eigen { +namespace internal { + +template +struct is_ref_compatible_impl +{ +private: + template + struct any_conversion + { + template any_conversion(const volatile T&); + template any_conversion(T&); + }; + struct yes {int a[1];}; + struct no {int a[2];}; + + template + static yes test(const Ref&, int); + template + static no test(any_conversion, ...); + +public: + static MatrixType ms_from; + enum { value = sizeof(test(ms_from, 0))==sizeof(yes) }; +}; + +template +struct is_ref_compatible +{ + enum { value = is_ref_compatible_impl::type>::value }; +}; + +template::value> +class generic_matrix_wrapper; + +// We have an explicit matrix at hand, compatible with Ref<> +template +class generic_matrix_wrapper +{ +public: + typedef Ref ActualMatrixType; + template struct ConstSelfAdjointViewReturnType { + typedef typename ActualMatrixType::template ConstSelfAdjointViewReturnType::Type Type; + }; + + enum { + MatrixFree = false + }; + + generic_matrix_wrapper() + : m_dummy(0,0), m_matrix(m_dummy) + {} + + template + generic_matrix_wrapper(const InputType &mat) + : m_matrix(mat) + {} + + const ActualMatrixType& matrix() const + { + return m_matrix; + } + + template + void grab(const EigenBase &mat) + { + m_matrix.~Ref(); + ::new (&m_matrix) Ref(mat.derived()); + } + + void grab(const Ref &mat) + { + if(&(mat.derived()) != &m_matrix) + { + m_matrix.~Ref(); + ::new (&m_matrix) Ref(mat); + } + } + +protected: + MatrixType m_dummy; // used to default initialize the Ref<> object + ActualMatrixType m_matrix; +}; + +// MatrixType is not compatible with Ref<> -> matrix-free wrapper +template +class generic_matrix_wrapper +{ +public: + typedef MatrixType ActualMatrixType; + template struct ConstSelfAdjointViewReturnType + { + typedef ActualMatrixType Type; + }; + + enum { + MatrixFree = true + }; + + generic_matrix_wrapper() + : mp_matrix(0) + {} + + generic_matrix_wrapper(const MatrixType &mat) + : mp_matrix(&mat) + {} + + const ActualMatrixType& matrix() const + { + return *mp_matrix; + } + + void grab(const MatrixType &mat) + { + mp_matrix = &mat; + } + +protected: + const ActualMatrixType *mp_matrix; +}; + +} + /** \ingroup IterativeLinearSolvers_Module * \brief Base class for linear iterative solvers * @@ -42,7 +164,6 @@ public: /** Default constructor. */ IterativeSolverBase() - : m_dummy(0,0), mp_matrix(m_dummy) { init(); } @@ -59,10 +180,10 @@ public: */ template explicit IterativeSolverBase(const EigenBase& A) - : mp_matrix(A.derived()) + : m_matrixWrapper(A.derived()) { init(); - compute(mp_matrix); + compute(matrix()); } ~IterativeSolverBase() {} @@ -76,7 +197,7 @@ public: Derived& analyzePattern(const EigenBase& A) { grab(A.derived()); - m_preconditioner.analyzePattern(mp_matrix); + m_preconditioner.analyzePattern(matrix()); m_isInitialized = true; m_analysisIsOk = true; m_info = m_preconditioner.info(); @@ -97,7 +218,7 @@ public: { eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); grab(A.derived()); - m_preconditioner.factorize(mp_matrix); + m_preconditioner.factorize(matrix()); m_factorizationIsOk = true; m_info = m_preconditioner.info(); return derived(); @@ -117,7 +238,7 @@ public: Derived& compute(const EigenBase& A) { grab(A.derived()); - m_preconditioner.compute(mp_matrix); + m_preconditioner.compute(matrix()); m_isInitialized = true; m_analysisIsOk = true; m_factorizationIsOk = true; @@ -126,10 +247,10 @@ public: } /** \internal */ - Index rows() const { return mp_matrix.rows(); } + Index rows() const { return matrix().rows(); } /** \internal */ - Index cols() const { return mp_matrix.cols(); } + Index cols() const { return matrix().cols(); } /** \returns the tolerance threshold used by the stopping criteria. * \sa setTolerance() @@ -159,7 +280,7 @@ public: */ Index maxIterations() const { - return (m_maxIterations<0) ? 2*mp_matrix.cols() : m_maxIterations; + return (m_maxIterations<0) ? 2*matrix().cols() : m_maxIterations; } /** Sets the max number of iterations. @@ -239,25 +360,22 @@ protected: m_maxIterations = -1; m_tolerance = NumTraits::epsilon(); } - - template - void grab(const EigenBase &A) + + typedef internal::generic_matrix_wrapper MatrixWrapper; + typedef typename MatrixWrapper::ActualMatrixType ActualMatrixType; + + const ActualMatrixType& matrix() const { - mp_matrix.~Ref(); - ::new (&mp_matrix) Ref(A.derived()); + return m_matrixWrapper.matrix(); } - void grab(const Ref &A) + template + void grab(const InputType &A) { - if(&(A.derived()) != &mp_matrix) - { - mp_matrix.~Ref(); - ::new (&mp_matrix) Ref(A); - } + m_matrixWrapper.grab(A); } - MatrixType m_dummy; - Ref mp_matrix; + MatrixWrapper m_matrixWrapper; Preconditioner m_preconditioner; Index m_maxIterations; diff --git a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h index 1593c57b5..0aea0e099 100644 --- a/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h @@ -149,7 +149,7 @@ template< typename _MatrixType, typename _Preconditioner> class LeastSquaresConjugateGradient : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -193,7 +193,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - internal::least_square_conjugate_gradient(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); + internal::least_square_conjugate_gradient(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); } m_isInitialized = true; diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers index f0c017f00..31e880bdc 100644 --- a/unsupported/Eigen/IterativeSolvers +++ b/unsupported/Eigen/IterativeSolvers @@ -33,6 +33,7 @@ #include "../../Eigen/Jacobi" #include "../../Eigen/Householder" #include "src/IterativeSolvers/GMRES.h" +#include "src/IterativeSolvers/DGMRES.h" //#include "src/IterativeSolvers/SSORPreconditioner.h" #include "src/IterativeSolvers/MINRES.h" diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h index ab82e782d..8a28fc16f 100644 --- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h @@ -40,7 +40,6 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType:: { eigen_assert(vec.size() == perm.size()); typedef typename IndexType::Scalar Index; - typedef typename VectorType::Scalar Scalar; bool flag; for (Index k = 0; k < ncut; k++) { @@ -101,7 +100,7 @@ template< typename _MatrixType, typename _Preconditioner> class DGMRES : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -112,6 +111,7 @@ class DGMRES : public IterativeSolverBase > typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; + typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::RealScalar RealScalar; typedef _Preconditioner Preconditioner; typedef Matrix DenseMatrix; @@ -150,7 +150,7 @@ class DGMRES : public IterativeSolverBase > m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - dgmres(mp_matrix, b.col(j), xj, Base::m_preconditioner); + dgmres(matrix(), b.col(j), xj, Base::m_preconditioner); } m_info = failed ? NumericalIssue : m_error <= Base::m_tolerance ? Success @@ -202,7 +202,7 @@ class DGMRES : public IterativeSolverBase > template int dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const; // Compute data to use for deflation - int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, Index& neig) const; + int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const; // Apply deflation to a vector template int dgmresApplyDeflation(const RhsType& In, DestType& Out) const; @@ -218,7 +218,7 @@ class DGMRES : public IterativeSolverBase > mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles) mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */ mutable PartialPivLU m_luT; // LU factorization of m_T - mutable int m_neig; //Number of eigenvalues to extract at each restart + mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart mutable int m_r; // Current number of deflated eigenvalues, size of m_U mutable int m_maxNeig; // Maximum number of eigenvalues to deflate mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A @@ -338,7 +338,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con beta = std::abs(g(it+1)); m_error = beta/normRhs; - std::cerr << nbIts << " Relative Residual Norm " << m_error << std::endl; + // std::cerr << nbIts << " Relative Residual Norm " << m_error << std::endl; it++; nbIts++; if (m_error < m_tolerance) @@ -416,7 +416,7 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr } template< typename _MatrixType, typename _Preconditioner> -int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, Index& neig) const +int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const { // First, find the Schur form of the Hessenberg matrix H typename internal::conditional::IsComplex, ComplexSchur, RealSchur >::type schurofH; @@ -426,7 +426,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri schurofH.computeFromHessenberg(m_Hes.topLeftCorner(it,it), matrixQ, computeU); ComplexVector eig(it); - Matrixperm(it); + Matrixperm(it); eig = this->schurValues(schurofH); // Reorder the absolute values of Schur values diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h index 2cfa60140..23bc07d61 100644 --- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h @@ -257,7 +257,7 @@ template< typename _MatrixType, typename _Preconditioner> class GMRES : public IterativeSolverBase > { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -313,7 +313,7 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - if(!internal::gmres(mp_matrix, b.col(j), xj, Base::m_preconditioner, m_iterations, m_restart, m_error)) + if(!internal::gmres(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_restart, m_error)) failed = true; } m_info = failed ? NumericalIssue diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h index 84e491fa1..839025591 100644 --- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h @@ -198,7 +198,7 @@ namespace Eigen { { typedef IterativeSolverBase Base; - using Base::mp_matrix; + using Base::matrix; using Base::m_error; using Base::m_iterations; using Base::m_info; @@ -237,21 +237,31 @@ namespace Eigen { template void _solve_with_guess_impl(const Rhs& b, Dest& x) const { + typedef typename Base::MatrixWrapper MatrixWrapper; + typedef typename Base::ActualMatrixType ActualMatrixType; + enum { + TransposeInput = (!MatrixWrapper::MatrixFree) + && (UpLo==(Lower|Upper)) + && (!MatrixType::IsRowMajor) + && (!NumTraits::IsComplex) + }; + typedef typename internal::conditional, ActualMatrixType const&>::type RowMajorWrapper; + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY); typedef typename internal::conditional&, - SparseSelfAdjointView, UpLo> - >::type MatrixWrapperType; - + RowMajorWrapper, + typename MatrixWrapper::template ConstSelfAdjointViewReturnType::Type + >::type SelfAdjointWrapper; + m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; - + RowMajorWrapper row_mat(matrix()); for(int j=0; j Date: Mon, 7 Dec 2015 12:33:38 +0100 Subject: [PATCH 293/344] Add matrix-free solver example --- Eigen/src/IterativeLinearSolvers/BiCGSTAB.h | 2 + doc/Manual.dox | 2 + doc/MatrixfreeSolverExample.dox | 20 +++ doc/SparseLinearSystems.dox | 4 +- doc/examples/matrixfree_cg.cpp | 128 ++++++++++++++++++ .../Eigen/src/IterativeSolvers/DGMRES.h | 2 + .../Eigen/src/IterativeSolvers/GMRES.h | 2 + .../Eigen/src/IterativeSolvers/MINRES.h | 2 + 8 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 doc/MatrixfreeSolverExample.dox create mode 100644 doc/examples/matrixfree_cg.cpp diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index 191202138..454f46814 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -150,6 +150,8 @@ struct traits > * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * BiCGSTAB can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, typename _Preconditioner> diff --git a/doc/Manual.dox b/doc/Manual.dox index 7f04edff4..c10c490a7 100644 --- a/doc/Manual.dox +++ b/doc/Manual.dox @@ -125,6 +125,8 @@ namespace Eigen { \ingroup Sparse_chapter */ /** \addtogroup TopicSparseSystems \ingroup Sparse_chapter */ +/** \addtogroup MatrixfreeSolverExample + \ingroup Sparse_chapter */ /** \addtogroup Sparse_Reference \ingroup Sparse_chapter */ diff --git a/doc/MatrixfreeSolverExample.dox b/doc/MatrixfreeSolverExample.dox new file mode 100644 index 000000000..000cb0bbe --- /dev/null +++ b/doc/MatrixfreeSolverExample.dox @@ -0,0 +1,20 @@ + +namespace Eigen { + +/** + +\eigenManualPage MatrixfreeSolverExample Matrix-free solvers + +Iterative solvers such as ConjugateGradient and BiCGSTAB can be used in a matrix free context. To this end, user must provide a wrapper class inheriting EigenBase<> and implementing the following methods: + - Index rows() and Index cols(): returns number of rows and columns respectively + - operator* with and %Eigen dense column vector (its actual implementation goes in a specialization of the internal::generic_product_impl class) + +Eigen::internal::traits<> must also be specialized for the wrapper type. + +Here is a complete example wrapping a Eigen::SparseMatrix: +\include matrixfree_cg.cpp +Output: \verbinclude matrixfree_cg.out + +*/ + +} \ No newline at end of file diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index ba6a12035..9fb3282e7 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -133,9 +133,11 @@ x2 = solver.solve(b2); \endcode The compute() method is equivalent to calling both analyzePattern() and factorize(). -Finally, each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on. +Each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on. More details are available in the documentations of the respective classes. +Finally, most of the iterative solvers, can also be used in a \b matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + \section TheSparseCompute The Compute Step In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). diff --git a/doc/examples/matrixfree_cg.cpp b/doc/examples/matrixfree_cg.cpp new file mode 100644 index 000000000..6a205aea3 --- /dev/null +++ b/doc/examples/matrixfree_cg.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include + +class MatrixReplacement; +using Eigen::SparseMatrix; + +namespace Eigen { +namespace internal { + // MatrixReplacement looks-like a SparseMatrix, so let's inherits its traits: + template<> + struct traits : public Eigen::internal::traits > + {}; +} +} + +// Example of a matrix-free wrapper from a user type to Eigen's compatible type +// For the sake of simplicity, this example simply wrap a Eigen::SparseMatrix. +class MatrixReplacement : public Eigen::EigenBase { +public: + // Required typedefs, constants, and method: + typedef double Scalar; + typedef double RealScalar; + typedef int StorageIndex; + enum { + ColsAtCompileTime = Eigen::Dynamic, + MaxColsAtCompileTime = Eigen::Dynamic, + IsRowMajor = false + }; + + Index rows() const { return mp_mat->rows(); } + Index cols() const { return mp_mat->cols(); } + + template + Eigen::Product operator*(const Eigen::MatrixBase& x) const { + return Eigen::Product(*this, x.derived()); + } + + // Custom API: + MatrixReplacement() : mp_mat(0) {} + + void attachMyMatrix(const SparseMatrix &mat) { + mp_mat = &mat; + } + const SparseMatrix my_matrix() const { return *mp_mat; } + +private: + const SparseMatrix *mp_mat; +}; + + +// Implementation of MatrixReplacement * Eigen::DenseVector though a specialization of internal::generic_product_impl: +namespace Eigen { +namespace internal { + + template + struct generic_product_impl // GEMV stands for matrix-vector + : generic_product_impl_base > + { + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const MatrixReplacement& lhs, const Rhs& rhs, const Scalar& alpha) + { + // This method should implement "dst += alpha * lhs * rhs" inplace, + // however, for iterative solvers, alpha is always equal to 1, so let's not bother about it. + assert(alpha==Scalar(1) && "scaling is not implemented"); + + // Here we could simply call dst.noalias() += lhs.my_matrix() * rhs, + // but let's do something fancier (and less efficient): + for(Index i=0; i S = Eigen::MatrixXd::Random(n,n).sparseView(0.5,1); + S = S.transpose()*S; + + MatrixReplacement A; + A.attachMyMatrix(S); + + Eigen::VectorXd b(n), x; + b.setRandom(); + + // Solve Ax = b using various iterative solver with matrix-free version: + { + Eigen::ConjugateGradient cg; + cg.compute(A); + x = cg.solve(b); + std::cout << "CG: #iterations: " << cg.iterations() << ", estimated error: " << cg.error() << std::endl; + } + + { + Eigen::BiCGSTAB bicg; + bicg.compute(A); + x = bicg.solve(b); + std::cout << "BiCGSTAB: #iterations: " << bicg.iterations() << ", estimated error: " << bicg.error() << std::endl; + } + + { + Eigen::GMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "GMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } + + { + Eigen::DGMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "DGMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } + + { + Eigen::MINRES minres; + minres.compute(A); + x = minres.solve(b); + std::cout << "MINRES: #iterations: " << minres.iterations() << ", estimated error: " << minres.error() << std::endl; + } +} diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h index 8a28fc16f..bae04fc30 100644 --- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h @@ -83,6 +83,8 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType:: * x = solver.solve(b); * \endcode * + * DGMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * References : * [1] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid * Algebraic Solvers for Linear Systems Arising from Compressible diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h index 23bc07d61..fbe21fc7e 100644 --- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h @@ -251,6 +251,8 @@ struct traits > * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * GMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, typename _Preconditioner> diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h index 839025591..256990c1a 100644 --- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h +++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h @@ -191,6 +191,8 @@ namespace Eigen { * By default the iterations start with x=0 as an initial guess of the solution. * One can control the start using the solveWithGuess() method. * + * MINRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + * * \sa class ConjugateGradient, BiCGSTAB, SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner */ template< typename _MatrixType, int _UpLo, typename _Preconditioner> From 7dfe75f445835baff18bbe82ba7253f7563cbdc6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 7 Dec 2015 08:12:30 -0800 Subject: [PATCH 294/344] Fixed compilation warnings --- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 9e208934b..f49e2b260 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -531,7 +531,7 @@ struct TensorContractionEvaluatorBase eval_right_dims[i] = m_rightImpl.dimensions()[i]; } // We keep the pairs of contracting indices. - for (unsigned int i = 0; i < ContractDims; i++) { + for (int i = 0; i < ContractDims; i++) { eval_op_indices[i].first = op.indices()[i].first; eval_op_indices[i].second = op.indices()[i].second; } @@ -545,7 +545,7 @@ struct TensorContractionEvaluatorBase } // We need to flip all the pairs of contracting indices as well as // reversing the dimensions. - for (unsigned int i = 0; i < ContractDims; i++) { + for (int i = 0; i < ContractDims; i++) { eval_op_indices[i].first = LDims - 1 - op.indices()[i].second; eval_op_indices[i].second = RDims - 1 - op.indices()[i].first; } @@ -584,7 +584,7 @@ struct TensorContractionEvaluatorBase for (int i = 0; i < LDims; i++) { // find if we are contracting on index i of left tensor bool contracting = false; - for (unsigned int j = 0; j < ContractDims; j++) { + for (int j = 0; j < ContractDims; j++) { if (eval_op_indices[j].first == i) { contracting = true; break; @@ -612,7 +612,7 @@ struct TensorContractionEvaluatorBase for (int i = 0; i < RDims; i++) { bool contracting = false; // find if we are contracting on index i of right tensor - for (unsigned int j = 0; j < ContractDims; j++) { + for (int j = 0; j < ContractDims; j++) { if (eval_op_indices[j].second == i) { contracting = true; break; @@ -639,7 +639,7 @@ struct TensorContractionEvaluatorBase // each tensor, we'll only look at the first tensor here. m_rhs_inner_dim_contiguous = true; m_rhs_inner_dim_reordered = false; - for (unsigned int i = 0; i < ContractDims; i++) { + for (int i = 0; i < ContractDims; i++) { Index left = eval_op_indices[i].first; Index right = eval_op_indices[i].second; From fa4f933c0fe65eda6a051f978db12210f11f5cdb Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Mon, 7 Dec 2015 15:24:49 -0800 Subject: [PATCH 295/344] Add special functions to Eigen: lgamma, erf, erfc. Includes CUDA support and unit tests. --- Eigen/Core | 1 + Eigen/src/Core/GenericPacketMath.h | 15 ++ Eigen/src/Core/GlobalFunctions.h | 3 + Eigen/src/Core/SpecialFunctions.h | 144 ++++++++++++++++++ Eigen/src/Core/arch/CUDA/MathFunctions.h | 37 +++++ Eigen/src/Core/arch/CUDA/PacketMath.h | 6 + Eigen/src/Core/functors/UnaryFunctors.h | 72 +++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 6 + Eigen/src/Core/util/StaticAssert.h | 3 +- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 44 ++++++ test/array.cpp | 3 + test/packetmath.cpp | 23 +++ .../Eigen/CXX11/src/Tensor/TensorBase.h | 18 +++ unsupported/test/cxx11_tensor_cuda.cpp | 139 +++++++++++++++++ 14 files changed, 513 insertions(+), 1 deletion(-) create mode 100644 Eigen/src/Core/SpecialFunctions.h diff --git a/Eigen/Core b/Eigen/Core index 1ec749452..63602f4c3 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -300,6 +300,7 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" #if defined EIGEN_VECTORIZE_AVX diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 5f27d8166..0e7dd29ed 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -74,6 +74,9 @@ struct default_packet_traits HasSinh = 0, HasCosh = 0, HasTanh = 0, + HasLGamma = 0, + HasErf = 0, + HasErfc = 0 HasRound = 0, HasFloor = 0, @@ -432,6 +435,18 @@ Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { return numext::lgamma(a); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { return numext::erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { return numext::erfc(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 585974809..62fec7008 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -49,6 +49,9 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h new file mode 100644 index 000000000..d481f2e06 --- /dev/null +++ b/Eigen/src/Core/SpecialFunctions.h @@ -0,0 +1,144 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2015 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPECIAL_FUNCTIONS_H +#define EIGEN_SPECIAL_FUNCTIONS_H + +namespace Eigen { + +namespace internal { + +template +EIGEN_STRONG_INLINE Scalar __lgamma(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __lgamma(float x) { return lgammaf(x); } +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __lgamma(double x) { return lgamma(x); } + +template +EIGEN_STRONG_INLINE Scalar __erf(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erf(float x) { return erff(x); } +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erf(double x) { return erf(x); } + +template +EIGEN_STRONG_INLINE Scalar __erfc(Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erfc(float x) { return erfcf(x); } +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erfc(double x) { return erfc(x); } + +} // end namespace internal + +/**************************************************************************** + * Implementations * + ****************************************************************************/ + +namespace internal { + +/**************************************************************************** + * Implementation of + * lgamma * + ****************************************************************************/ + +template +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + return __lgamma(x); + } +}; + +template +struct lgamma_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** + * Implementation of + * erf * + ****************************************************************************/ + +template +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + return __erf(x); + } +}; + +template +struct erf_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of erfc * +****************************************************************************/ + +template +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + { + return __erfc(x); + } +}; + +template +struct erfc_retval +{ + typedef Scalar type; +}; + +} // end namespace internal + +namespace numext { + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); +} + +} // end namespace numext + +} // end namespace Eigen + +#endif // EIGEN_SPECIAL_FUNCTIONS_H diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 3bea88bea..ecd5c444e 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -66,6 +66,43 @@ double2 prsqrt(const double2& a) return make_double2(rsqrt(a.x), rsqrt(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plgamma(const float4& a) +{ + return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plgamma(const double2& a) +{ + return make_double2(lgamma(a.x), lgamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perf(const float4& a) +{ + return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perf(const double2& a) +{ + return make_double2(erf(a.x), erf(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 perfc(const float4& a) +{ + return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 perfc(const double2& a) +{ + return make_double2(erfc(a.x), erfc(a.y)); +} + + #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 0d2c2fef0..cb1b547e0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -39,6 +39,9 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, }; @@ -59,6 +62,9 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, + HasLGamma = 1, + HasErf = 1, + HasErfc = 1, HasBlend = 0, }; diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index e6c665fb6..e16bdd589 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -403,6 +403,77 @@ struct functor_traits > }; }; + +/** \internal + * \brief Template functor to compute the natural log of the absolute + * value of Gamma of a scalar + * \sa class CwiseUnaryOp, Cwise::lgamma() + */ +template struct scalar_lgamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::lgamma; return lgamma(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasLGamma + }; +}; + +/** \internal + * \brief Template functor to compute the Gauss error function of a + * scalar + * \sa class CwiseUnaryOp, Cwise::erf() + */ +template struct scalar_erf_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erf; return erf(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perf(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasErf + }; +}; + +/** \internal + * \brief Template functor to compute the Complementary Error Function + * of a scalar + * \sa class CwiseUnaryOp, Cwise::erfc() + */ +template struct scalar_erfc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::erfc; return erfc(a); + } + typedef typename packet_traits::type Packet; + inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } +}; +template +struct functor_traits > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, + PacketAccess = packet_traits::HasErfc + }; +}; + + /** \internal * \brief Template functor to compute the atan of a scalar * \sa class CwiseUnaryOp, ArrayBase::atan() @@ -422,6 +493,7 @@ struct functor_traits > }; }; + /** \internal * \brief Template functor to compute the tanh of a scalar * \sa class CwiseUnaryOp, ArrayBase::tanh() diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 483af876f..27c7907fc 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -294,6 +294,12 @@ struct stem_function }; } +// SpecialFunctions forward declarations +namespace internal { +template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __lgamma(Scalar x); +template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erf(Scalar x); +template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erfc(Scalar x); + } // end namespace Eigen #endif // EIGEN_FORWARDDECLARATIONS_H diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 108181419..1fe365aa7 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -96,7 +96,8 @@ STORAGE_LAYOUT_DOES_NOT_MATCH, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, - MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, + THIS_TYPE_IS_NOT_SUPPORTED }; }; diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 45e826b0c..ed9818dd1 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -21,6 +21,9 @@ typedef CwiseUnaryOp, const Derived> AtanReturn typedef CwiseUnaryOp, const Derived> TanhReturnType; typedef CwiseUnaryOp, const Derived> SinhReturnType; typedef CwiseUnaryOp, const Derived> CoshReturnType; +typedef CwiseUnaryOp, const Derived> LgammaReturnType; +typedef CwiseUnaryOp, const Derived> ErfReturnType; +typedef CwiseUnaryOp, const Derived> ErfcReturnType; typedef CwiseUnaryOp, const Derived> PowReturnType; typedef CwiseUnaryOp, const Derived> SquareReturnType; typedef CwiseUnaryOp, const Derived> CubeReturnType; @@ -302,6 +305,47 @@ cosh() const return CoshReturnType(derived()); } +/** \returns an expression of the coefficient-wise ln(|gamma(*this)|). + * + * Example: \include Cwise_lgamma.cpp + * Output: \verbinclude Cwise_lgamma.out + * + * \sa cos(), sin(), tan() + */ +inline const CwiseUnaryOp, Derived> +lgamma() const +{ + return LgammaReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Gauss error + * function of *this. + * + * Example: \include Cwise_erf.cpp + * Output: \verbinclude Cwise_erf.out + * + * \sa cos(), sin(), tan() + */ +inline const CwiseUnaryOp, Derived> +erf() const +{ + return ErfReturnType(derived()); +} + +/** \returns an expression of the coefficient-wise Complementary error + * function of *this. + * + * Example: \include Cwise_erfc.cpp + * Output: \verbinclude Cwise_erfc.out + * + * \sa cos(), sin(), tan() + */ +inline const CwiseUnaryOp, Derived> +erfc() const +{ + return ErfcReturnType(derived()); +} + /** \returns an expression of the coefficient-wise power of *this to the given exponent. * * This function computes the coefficient-wise power. The function MatrixBase::pow() in the diff --git a/test/array.cpp b/test/array.cpp index 5395721f5..9994c23c3 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -217,6 +217,9 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); + VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); + VERIFY_IS_APPROX(m1.erf(), erf(m1)); + VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.round(), round(m1)); VERIFY_IS_APPROX(m1.floor(), floor(m1)); diff --git a/test/packetmath.cpp b/test/packetmath.cpp index b6616ac5e..304fab5de 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -351,6 +351,25 @@ template void packetmath_real() VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasLGamma,Packet> h; + h.store(data2, internal::plgamma(h.load(data1))); + VERIFY(std::isnan(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErf,Packet> h; + h.store(data2, internal::perf(h.load(data1))); + VERIFY(std::isnan(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErfc,Packet> h; + h.store(data2, internal::perfc(h.load(data1))); + VERIFY(std::isnan(data2[0])); + } + for (int i=0; i(0,1) * std::pow(Scalar(10), internal::random(-6,6)); @@ -360,6 +379,10 @@ template void packetmath_real() data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); + CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); + CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); + CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); + if(PacketTraits::HasLog && PacketTraits::size>=2) { data1[0] = std::numeric_limits::quiet_NaN(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index d1ce3d0ed..392acf302 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -122,6 +122,24 @@ class TensorBase return unaryExpr(internal::scalar_tanh_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + lgamma() const { + return unaryExpr(internal::scalar_lgamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erf() const { + return unaryExpr(internal::scalar_erf_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erfc() const { + return unaryExpr(internal::scalar_erfc_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> sigmoid() const { diff --git a/unsupported/test/cxx11_tensor_cuda.cpp b/unsupported/test/cxx11_tensor_cuda.cpp index 5ff082a3a..49e1894ab 100644 --- a/unsupported/test/cxx11_tensor_cuda.cpp +++ b/unsupported/test/cxx11_tensor_cuda.cpp @@ -507,6 +507,115 @@ static void test_cuda_convolution_3d() } } + +template +void test_cuda_lgamma(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.lgamma(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j))); + } + } +} + +template +void test_cuda_erf(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erf(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j))); + } + } +} + +template +void test_cuda_erfc(const Scalar stddev) +{ + Tensor in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 72, 97); + Eigen::TensorMap > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erfc(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j))); + } + } +} + void test_cxx11_tensor_cuda() { CALL_SUBTEST(test_cuda_elementwise_small()); @@ -522,4 +631,34 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST(test_cuda_convolution_2d()); CALL_SUBTEST(test_cuda_convolution_3d()); CALL_SUBTEST(test_cuda_convolution_3d()); + CALL_SUBTEST(test_cuda_lgamma(1.0f)); + CALL_SUBTEST(test_cuda_lgamma(100.0f)); + CALL_SUBTEST(test_cuda_lgamma(0.01f)); + CALL_SUBTEST(test_cuda_lgamma(0.001f)); + CALL_SUBTEST(test_cuda_erf(1.0f)); + CALL_SUBTEST(test_cuda_erf(100.0f)); + CALL_SUBTEST(test_cuda_erf(0.01f)); + CALL_SUBTEST(test_cuda_erf(0.001f)); + CALL_SUBTEST(test_cuda_erfc(1.0f)); + // CALL_SUBTEST(test_cuda_erfc(100.0f)); + CALL_SUBTEST(test_cuda_erfc(5.0f)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST(test_cuda_erfc(0.01f)); + CALL_SUBTEST(test_cuda_erfc(0.001f)); + CALL_SUBTEST(test_cuda_tanh(1.0)); + CALL_SUBTEST(test_cuda_tanh(100.0)); + CALL_SUBTEST(test_cuda_tanh(0.01)); + CALL_SUBTEST(test_cuda_tanh(0.001)); + CALL_SUBTEST(test_cuda_lgamma(1.0)); + CALL_SUBTEST(test_cuda_lgamma(100.0)); + CALL_SUBTEST(test_cuda_lgamma(0.01)); + CALL_SUBTEST(test_cuda_lgamma(0.001)); + CALL_SUBTEST(test_cuda_erf(1.0)); + CALL_SUBTEST(test_cuda_erf(100.0)); + CALL_SUBTEST(test_cuda_erf(0.01)); + CALL_SUBTEST(test_cuda_erf(0.001)); + CALL_SUBTEST(test_cuda_erfc(1.0)); + // CALL_SUBTEST(test_cuda_erfc(100.0)); + CALL_SUBTEST(test_cuda_erfc(5.0)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST(test_cuda_erfc(0.01)); + CALL_SUBTEST(test_cuda_erfc(0.001)); } From 73b68d4370f761d6422e02e7e515aefdcd652c1e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 7 Dec 2015 16:38:48 -0800 Subject: [PATCH 296/344] Fixed a couple of typos Cleaned up the code a bit. --- Eigen/src/Core/GenericPacketMath.h | 4 ++-- Eigen/src/Core/SpecialFunctions.h | 13 +++++-------- Eigen/src/Core/util/ForwardDeclarations.h | 6 ------ test/packetmath.cpp | 9 ++++++--- 4 files changed, 13 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 0e7dd29ed..6872f5e53 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -43,7 +43,7 @@ struct default_packet_traits { enum { HasHalfPacket = 0, - + HasAdd = 1, HasSub = 1, HasMul = 1, @@ -76,7 +76,7 @@ struct default_packet_traits HasTanh = 0, HasLGamma = 0, HasErf = 0, - HasErfc = 0 + HasErfc = 0, HasRound = 0, HasFloor = 0, diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index d481f2e06..ae8f0105a 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -1,7 +1,6 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2010 Benoit Jacob // Copyright (C) 2015 Eugene Brevdo // // This Source Code Form is subject to the terms of the Mozilla @@ -45,14 +44,13 @@ template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erfc(double x } // end namespace internal /**************************************************************************** - * Implementations * + * Implementations * ****************************************************************************/ namespace internal { /**************************************************************************** - * Implementation of - * lgamma * + * Implementation of lgamma * ****************************************************************************/ template @@ -72,8 +70,7 @@ struct lgamma_retval }; /**************************************************************************** - * Implementation of - * erf * + * Implementation of erf * ****************************************************************************/ template @@ -92,8 +89,8 @@ struct erf_retval typedef Scalar type; }; -/**************************************************************************** -* Implementation of erfc * +/*************************************************************************** +* Implementation of erfc * ****************************************************************************/ template diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 27c7907fc..483af876f 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -294,12 +294,6 @@ struct stem_function }; } -// SpecialFunctions forward declarations -namespace internal { -template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __lgamma(Scalar x); -template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erf(Scalar x); -template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar __erfc(Scalar x); - } // end namespace Eigen #endif // EIGEN_FORWARDDECLARATIONS_H diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 304fab5de..c34b6f3f1 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -355,19 +355,19 @@ template void packetmath_real() data1[0] = std::numeric_limits::quiet_NaN(); packet_helper::HasLGamma,Packet> h; h.store(data2, internal::plgamma(h.load(data1))); - VERIFY(std::isnan(data2[0])); + VERIFY((numext::isnan)(data2[0])); } { data1[0] = std::numeric_limits::quiet_NaN(); packet_helper::HasErf,Packet> h; h.store(data2, internal::perf(h.load(data1))); - VERIFY(std::isnan(data2[0])); + VERIFY((numext::isnan)(data2[0])); } { data1[0] = std::numeric_limits::quiet_NaN(); packet_helper::HasErfc,Packet> h; h.store(data2, internal::perfc(h.load(data1))); - VERIFY(std::isnan(data2[0])); + VERIFY((numext::isnan)(data2[0])); } for (int i=0; i void packetmath_real() data1[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); data2[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); } + +#if __cplusplus > 199711L if(internal::random(0,1)<0.1) data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); @@ -382,6 +384,7 @@ template void packetmath_real() CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); +#endif if(PacketTraits::HasLog && PacketTraits::size>=2) { From b1ae39794cee2536d28360acd2ea6291806debe1 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 7 Dec 2015 16:46:35 -0800 Subject: [PATCH 297/344] Simplified the code a bit --- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index ed9818dd1..01432e2f3 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -312,7 +312,7 @@ cosh() const * * \sa cos(), sin(), tan() */ -inline const CwiseUnaryOp, Derived> +inline const LgammaReturnType lgamma() const { return LgammaReturnType(derived()); @@ -326,7 +326,7 @@ lgamma() const * * \sa cos(), sin(), tan() */ -inline const CwiseUnaryOp, Derived> +inline const ErfReturnType erf() const { return ErfReturnType(derived()); @@ -340,7 +340,7 @@ erf() const * * \sa cos(), sin(), tan() */ -inline const CwiseUnaryOp, Derived> +inline const ErfcReturnType erfc() const { return ErfcReturnType(derived()); From b630d10b62d4338181a49272d5dd57381964d3a2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 7 Dec 2015 17:08:08 -0800 Subject: [PATCH 298/344] Only disable the erf, erfc, and lgamma tests for older versions of c++. --- test/packetmath.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/packetmath.cpp b/test/packetmath.cpp index c34b6f3f1..758c2fb9d 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -376,11 +376,11 @@ template void packetmath_real() data2[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); } -#if __cplusplus > 199711L if(internal::random(0,1)<0.1) data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); +#if __cplusplus > 199711L CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); From e535450573bf8a15d63cc0dff6090a89f28cf8cb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Dec 2015 14:06:39 -0800 Subject: [PATCH 299/344] Cleanup --- Eigen/src/Core/GenericPacketMath.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 6872f5e53..8ad51bad5 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -437,15 +437,15 @@ Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } /** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet plgamma(const Packet& a) { return numext::lgamma(a); } +Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } /** \internal \returns the erf(\a a) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perf(const Packet& a) { return numext::erf(a); } +Packet perf(const Packet& a) { using numext::erf; return erf(a); } /** \internal \returns the erfc(\a a) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perfc(const Packet& a) { return numext::erfc(a); } +Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } /*************************************************************************** * The following functions might not have to be overwritten for vectorized types From 53b196aa5fb503ab3707887eea226eec56943380 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 8 Dec 2015 14:17:34 -0800 Subject: [PATCH 300/344] Simplified the implementation of lgamma, erf, and erfc --- Eigen/src/Core/SpecialFunctions.h | 87 +++++++++++++++++-------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index ae8f0105a..f90f1b81b 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -11,42 +11,6 @@ #define EIGEN_SPECIAL_FUNCTIONS_H namespace Eigen { - -namespace internal { - -template -EIGEN_STRONG_INLINE Scalar __lgamma(Scalar x) { - EIGEN_STATIC_ASSERT((internal::is_same::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); -} - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __lgamma(float x) { return lgammaf(x); } -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __lgamma(double x) { return lgamma(x); } - -template -EIGEN_STRONG_INLINE Scalar __erf(Scalar x) { - EIGEN_STATIC_ASSERT((internal::is_same::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); -} - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erf(float x) { return erff(x); } -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erf(double x) { return erf(x); } - -template -EIGEN_STRONG_INLINE Scalar __erfc(Scalar x) { - EIGEN_STATIC_ASSERT((internal::is_same::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); -} - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float __erfc(float x) { return erfcf(x); } -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double __erfc(double x) { return erfc(x); } - -} // end namespace internal - -/**************************************************************************** - * Implementations * - ****************************************************************************/ - namespace internal { /**************************************************************************** @@ -59,10 +23,25 @@ struct lgamma_impl EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) { - return __lgamma(x); + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); } }; +template<> +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); } +}; + +template<> +struct lgamma_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); } +}; + template struct lgamma_retval { @@ -79,10 +58,25 @@ struct erf_impl EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) { - return __erf(x); + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); } }; +template<> +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); } +}; + +template<> +struct erf_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); } +}; + template struct erf_retval { @@ -99,10 +93,25 @@ struct erfc_impl EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) { - return __erfc(x); + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); } }; +template<> +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } +}; + +template<> +struct erfc_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } +}; + template struct erfc_retval { From b0a1d6f2e532afa475077f9d8b79b1cbe87b71ae Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 15:47:06 +0100 Subject: [PATCH 301/344] Improve handling of deprecated EIGEN_INCLUDE_INSTALL_DIR variable --- CMakeLists.txt | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5240f3039..eaee5d5e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -338,10 +338,20 @@ option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tens include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -set(INCLUDE_INSTALL_DIR - "${CMAKE_INSTALL_INCLUDEDIR}/eigen3" - CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed" - ) +# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR +if(EIGEN_INCLUDE_INSTALL_DIR) + message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.") +endif() + +if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR) + set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed") +else() + set(INCLUDE_INSTALL_DIR + "${CMAKE_INSTALL_INCLUDEDIR}/eigen3" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed" + ) +endif() set(CMAKEPACKAGE_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/eigen3" CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed" @@ -351,11 +361,6 @@ set(PKGCONFIG_INSTALL_DIR CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed" ) -# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR -if(DEFINED EIGEN_INCLUDE_INSTALL_DIR) - message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.") - set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "" FORCE) -endif() # similar to set_target_properties but append the property instead of overwriting it macro(ei_add_target_property target prop value) From 7ad1aaec1db2dbf1abfc19a5bb62626653fa48fb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 16:06:33 +0100 Subject: [PATCH 302/344] bug #1103: fix neon vectorization of pmul(Packet1cd,Packet1cd) --- Eigen/src/Core/arch/NEON/Complex.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d2322b307..d2d467936 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -73,7 +73,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, con // Get the real values of a | a1_re | a1_re | a2_re | a2_re | v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); - // Get the real values of a | a1_im | a1_im | a2_im | a2_im | + // Get the imag values of a | a1_im | a1_im | a2_im | a2_im | v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); // Multiply the real a with b v1 = vmulq_f32(v1, b.v); @@ -325,8 +325,8 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, con // Get the real values of a v1 = vdupq_lane_f64(vget_low_f64(a.v), 0); - // Get the real values of a - v2 = vdupq_lane_f64(vget_high_f64(a.v), 1); + // Get the imag values of a + v2 = vdupq_lane_f64(vget_high_f64(a.v), 0); // Multiply the real a with b v1 = vmulq_f64(v1, b.v); // Multiply the imag a with b From 46d2f6cd78aa752c55099072217993af81f70779 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 21:33:43 +0100 Subject: [PATCH 303/344] Workaround gcc issue with -O3 and the i387 FPU. --- test/packetmath.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/packetmath.cpp b/test/packetmath.cpp index b6616ac5e..f1826f0ef 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -18,7 +18,9 @@ template T negate(const T& x) { return -x; } } } -template bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits::Real& refvalue) +// NOTE: we disbale inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU. +template EIGEN_DONT_INLINE +bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits::Real& refvalue) { return internal::isMuchSmallerThan(a-b, refvalue); } From 48877a69334382b8478f5095c5e56500b7de7478 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 13:09:49 -0800 Subject: [PATCH 304/344] Only implement the lgamma, erf, and erfc functions when using a compiler compliant with the C99 specification. --- Eigen/src/Core/SpecialFunctions.h | 43 ++++++++++++++++++------------- test/packetmath.cpp | 4 ++- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index f90f1b81b..1de3d7f78 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -28,6 +28,13 @@ struct lgamma_impl } }; +template +struct lgamma_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH template<> struct lgamma_impl { @@ -41,12 +48,7 @@ struct lgamma_impl EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); } }; - -template -struct lgamma_retval -{ - typedef Scalar type; -}; +#endif /**************************************************************************** * Implementation of erf * @@ -63,6 +65,13 @@ struct erf_impl } }; +template +struct erf_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH template<> struct erf_impl { @@ -76,12 +85,7 @@ struct erf_impl EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); } }; - -template -struct erf_retval -{ - typedef Scalar type; -}; +#endif // EIGEN_HAS_C99_MATH /*************************************************************************** * Implementation of erfc * @@ -98,6 +102,13 @@ struct erfc_impl } }; +template +struct erfc_retval +{ + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH template<> struct erfc_impl { @@ -111,15 +122,11 @@ struct erfc_impl EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } }; - -template -struct erfc_retval -{ - typedef Scalar type; -}; +#endif // EIGEN_HAS_C99_MATH } // end namespace internal + namespace numext { template diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 758c2fb9d..91bb998d0 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -351,6 +351,7 @@ template void packetmath_real() VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); } +#ifdef EIGEN_HAS_C99_MATH { data1[0] = std::numeric_limits::quiet_NaN(); packet_helper::HasLGamma,Packet> h; @@ -369,6 +370,7 @@ template void packetmath_real() h.store(data2, internal::perfc(h.load(data1))); VERIFY((numext::isnan)(data2[0])); } +#endif // EIGEN_HAS_C99_MATH for (int i=0; i void packetmath_real() data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); -#if __cplusplus > 199711L +#if defined(EIGEN_HAS_C99_MATH) && (__cplusplus > 199711L) CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); From 58e06447dec67e265fb0a749e60f67ecd831b32b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 13:11:36 -0800 Subject: [PATCH 305/344] Silence a compilation warning --- Eigen/src/Core/SpecialFunctions.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 1de3d7f78..05973e372 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -25,6 +25,7 @@ struct lgamma_impl { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); } }; @@ -62,6 +63,7 @@ struct erf_impl { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); } }; @@ -99,6 +101,7 @@ struct erfc_impl { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); } }; From 8314962ce2ce5e7cd8c591b7a0a7039abd83f5c6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 13:13:45 -0800 Subject: [PATCH 306/344] Only test the lgamma, erf and erfc function when using a C99 compliant compiler --- test/array.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/array.cpp b/test/array.cpp index 9994c23c3..6adedfb06 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -202,7 +202,7 @@ template void array_real(const ArrayType& m) m2 = ArrayType::Random(rows, cols), m3(rows, cols), m4 = m1; - + m4 = (m4.abs()==Scalar(0)).select(1,m4); Scalar s1 = internal::random(); @@ -217,9 +217,11 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); +#ifdef EIGEN_HAS_C99_MATH VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); VERIFY_IS_APPROX(m1.erf(), erf(m1)); VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); +#endif // EIGEN_HAS_C99_MATH VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.round(), round(m1)); VERIFY_IS_APPROX(m1.floor(), floor(m1)); From 22dd368ea059586de26ceebe77eaf52f3cae02e8 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Thu, 10 Dec 2015 16:14:29 -0500 Subject: [PATCH 307/344] sign(complex) compiles for GPU --- Eigen/src/Core/functors/UnaryFunctors.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index e6c665fb6..e630acc38 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -676,8 +676,13 @@ struct scalar_sign_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op) EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - typename NumTraits::Real aa = std::abs(a); - return (aa==0) ? Scalar(0) : (a/aa); + using std::abs; + typedef typename NumTraits::Real real_type; + real_type aa = abs(a); + if (aa==0) + return Scalar(0); + aa = 1./aa; + return Scalar(real(a)*aa, imag(a)*aa ); } //TODO //template From d1862967a89501f0382834e0d128a53ad5764377 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 22:23:21 +0100 Subject: [PATCH 308/344] Make sure ADOLC is recent enough by searching for adtl.h --- cmake/FindAdolc.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FindAdolc.cmake b/cmake/FindAdolc.cmake index 1a7ff3628..937e54990 100644 --- a/cmake/FindAdolc.cmake +++ b/cmake/FindAdolc.cmake @@ -5,7 +5,7 @@ endif (ADOLC_INCLUDES AND ADOLC_LIBRARIES) find_path(ADOLC_INCLUDES NAMES - adolc/adouble.h + adolc/adtl.h PATHS $ENV{ADOLCDIR} ${INCLUDE_INSTALL_DIR} From df6f54ff63fbf8ec4bd6218d9887351b30dda30f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 22:24:58 +0100 Subject: [PATCH 309/344] Fix storage order of PartialRedux --- Eigen/src/Core/CoreEvaluators.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 42ad452f7..f97dc33de 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -994,7 +994,7 @@ struct evaluator > CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), - Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits), + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&(HereditaryBits&(~RowMajorBit))), Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized }; From b820b097b870f96538f87862bb3cf22d2b3f4b3b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 13:52:05 -0800 Subject: [PATCH 310/344] Created EIGEN_HAS_C99_MATH define as Gael suggested. --- Eigen/src/Core/util/Macros.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 8def69610..d375c77dd 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -341,6 +341,13 @@ #define EIGEN_HAVE_RVALUE_REFERENCES #endif +// Does the compiler support C99? +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ + || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) +#define EIGEN_HAS_C99_MATH 1 +#endif + // Does the compiler support result_of? #if (__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)) #define EIGEN_HAS_STD_RESULT_OF 1 From 6acf2bd4725a3394c40e1b542ae03a9c6fbb9a2c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 17:17:42 -0800 Subject: [PATCH 311/344] Fixed compilation error triggered by MSVC 2008 --- test/packetmath.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 91bb998d0..bf2e3fecc 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -336,7 +336,7 @@ template void packetmath_real() data1[1] = 0; h.store(data2, internal::pexp(h.load(data1))); VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::epsilon()), data2[0]); - VERIFY_IS_EQUAL(std::exp(0), data2[1]); + VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]); data1[0] = (std::numeric_limits::min)(); data1[1] = -(std::numeric_limits::min)(); @@ -401,7 +401,7 @@ template void packetmath_real() data1[1] = 0; h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isnan)(data2[0])); - VERIFY_IS_EQUAL(std::log(0), data2[1]); + VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]); data1[0] = (std::numeric_limits::min)(); data1[1] = -(std::numeric_limits::min)(); From 4e324ca6ae1ae7b60e18227bbfdde9a0380e90e7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 20:47:25 -0800 Subject: [PATCH 312/344] Updated the cxx11_tensor_assign test to make it compile without support for cxx11 --- unsupported/test/cxx11_tensor_assign.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp index d16aaf847..e5cf61fe1 100644 --- a/unsupported/test/cxx11_tensor_assign.cpp +++ b/unsupported/test/cxx11_tensor_assign.cpp @@ -29,8 +29,8 @@ static void test_1d() int row_major[6]; memset(col_major, 0, 6*sizeof(int)); memset(row_major, 0, 6*sizeof(int)); - TensorMap> vec3(col_major, 6); - TensorMap> vec4(row_major, 6); + TensorMap > vec3(col_major, 6); + TensorMap > vec4(row_major, 6); vec3 = vec1; vec4 = vec2; @@ -92,8 +92,8 @@ static void test_2d() int row_major[6]; memset(col_major, 0, 6*sizeof(int)); memset(row_major, 0, 6*sizeof(int)); - TensorMap> mat3(row_major, 2, 3); - TensorMap> mat4(col_major, 2, 3); + TensorMap > mat3(row_major, 2, 3); + TensorMap > mat4(col_major, 2, 3); mat3 = mat1; mat4 = mat2; @@ -152,8 +152,8 @@ static void test_3d() int row_major[2*3*7]; memset(col_major, 0, 2*3*7*sizeof(int)); memset(row_major, 0, 2*3*7*sizeof(int)); - TensorMap> mat3(col_major, 2, 3, 7); - TensorMap> mat4(row_major, 2, 3, 7); + TensorMap > mat3(col_major, 2, 3, 7); + TensorMap > mat4(row_major, 2, 3, 7); mat3 = mat1; mat4 = mat2; From 9db8316c936b2d83e2b6484b681b275f9cccae95 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 20:53:44 -0800 Subject: [PATCH 313/344] Updated the cxx11_tensor_custom_op to not require cxx11. --- unsupported/test/cxx11_tensor_custom_op.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/unsupported/test/cxx11_tensor_custom_op.cpp b/unsupported/test/cxx11_tensor_custom_op.cpp index 7e33c9580..8baa477cc 100644 --- a/unsupported/test/cxx11_tensor_custom_op.cpp +++ b/unsupported/test/cxx11_tensor_custom_op.cpp @@ -25,7 +25,9 @@ struct InsertZeros { template void eval(const Tensor& input, Output& output, const Device& device) const { - array strides{{2, 2}}; + array strides; + strides[0] = 2; + strides[1] = 2; output.stride(strides).device(device) = input; Eigen::DSizes offsets(1,1); @@ -70,7 +72,8 @@ struct BatchMatMul { Output& output, const Device& device) const { typedef Tensor::DimensionPair DimPair; - array dims({{DimPair(1, 0)}}); + array dims; + dims[0] = DimPair(1, 0); for (int i = 0; i < output.dimension(2); ++i) { output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims); } @@ -88,9 +91,10 @@ static void test_custom_binary_op() Tensor result = tensor1.customOp(tensor2, BatchMatMul()); for (int i = 0; i < 5; ++i) { typedef Tensor::DimensionPair DimPair; - array dims({{DimPair(1, 0)}}); + array dims; + dims[0] = DimPair(1, 0); Tensor reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims); - TensorRef> val = result.chip<2>(i); + TensorRef > val = result.chip<2>(i); for (int j = 0; j < 2; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(val(j, k), reference(j, k)); From 8e00ea9a92cfbe849056bc74a1aab34ff8e8a811 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 22:45:10 -0800 Subject: [PATCH 314/344] Fixed the coefficient accessors use for the 2d and 3d case when compiling without cxx11 support. --- unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 4347bc2ff..5c759af09 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -49,7 +49,7 @@ template class TensorMap : public Tensor IsAligned = ((int(Options_)&Aligned)==Aligned), PacketAccess = (internal::packet_traits::size > 1), Layout = PlainObjectType::Layout, - CoordAccess = true, + CoordAccess = true }; EIGEN_DEVICE_FUNC @@ -158,7 +158,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const { if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[0]; + const Index index = i1 + i0 * m_dimensions[1]; return m_data[index]; } else { const Index index = i0 + i1 * m_dimensions[0]; @@ -169,7 +169,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const { if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); return m_data[index]; } else { const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); @@ -245,7 +245,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) { if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[0]; + const Index index = i1 + i0 * m_dimensions[1]; return m_data[index]; } else { const Index index = i0 + i1 * m_dimensions[0]; @@ -256,7 +256,7 @@ template class TensorMap : public Tensor EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) { if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0); + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); return m_data[index]; } else { const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); From 8d28a161b2f3a8866a7558303514861d2a3b6c69 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 22:53:56 -0800 Subject: [PATCH 315/344] Use the proper accessor to refer to the value of a scalar tensor --- unsupported/test/cxx11_tensor_sugar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp index 98671a986..adac472cf 100644 --- a/unsupported/test/cxx11_tensor_sugar.cpp +++ b/unsupported/test/cxx11_tensor_sugar.cpp @@ -18,7 +18,7 @@ static void test_comparison_sugar() { #define TEST_TENSOR_EQUAL(e1, e2) \ b = ((e1) == (e2)).all(); \ - VERIFY(b(0)) + VERIFY(b()) #define TEST_OP(op) TEST_TENSOR_EQUAL(t op 0, t op t.constant(0)) From 2d8f2e4042ed8c347d90fb2dacc53a480f7a28b4 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 23:20:04 -0800 Subject: [PATCH 316/344] Made 2 tests compile without cxx11. HdG: -- --- unsupported/test/cxx11_tensor_casts.cpp | 4 ++-- unsupported/test/cxx11_tensor_reverse.cpp | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp index 729e43327..3c6d0d2ff 100644 --- a/unsupported/test/cxx11_tensor_casts.cpp +++ b/unsupported/test/cxx11_tensor_casts.cpp @@ -24,12 +24,12 @@ static void test_simple_cast() cplextensor.setRandom(); chartensor = ftensor.cast(); - cplextensor = ftensor.cast>(); + cplextensor = ftensor.cast >(); for (int i = 0; i < 20; ++i) { for (int j = 0; j < 30; ++j) { VERIFY_IS_EQUAL(chartensor(i,j), static_cast(ftensor(i,j))); - VERIFY_IS_EQUAL(cplextensor(i,j), static_cast>(ftensor(i,j))); + VERIFY_IS_EQUAL(cplextensor(i,j), static_cast >(ftensor(i,j))); } } } diff --git a/unsupported/test/cxx11_tensor_reverse.cpp b/unsupported/test/cxx11_tensor_reverse.cpp index f96c21fa3..b35b8d29e 100644 --- a/unsupported/test/cxx11_tensor_reverse.cpp +++ b/unsupported/test/cxx11_tensor_reverse.cpp @@ -114,10 +114,18 @@ static void test_expr_reverse(bool LValue) Tensor result(2,3,5,7); - array src_slice_dim{{2,3,1,7}}; - array src_slice_start{{0,0,0,0}}; - array dst_slice_dim{{2,3,1,7}}; - array dst_slice_start{{0,0,0,0}}; + array src_slice_dim; + src_slice_dim[0] = 2; + src_slice_dim[1] = 3; + src_slice_dim[2] = 1; + src_slice_dim[3] = 7; + array src_slice_start; + src_slice_start[0] = 0; + src_slice_start[1] = 0; + src_slice_start[2] = 0; + src_slice_start[3] = 0; + array dst_slice_dim = src_slice_dim; + array dst_slice_start = src_slice_start; for (int i = 0; i < 5; ++i) { if (LValue) { From 6af52a1227f204e72d9f8473deb3bb648a665149 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 10 Dec 2015 23:31:12 -0800 Subject: [PATCH 317/344] Fixed a typo in the constructor of tensors of rank 5. --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 6d357545c..87ac8f5aa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -78,7 +78,7 @@ class Tensor : public TensorBase0) & !(Options_&DontAlign), PacketAccess = (internal::packet_traits::size > 1), Layout = Options_ & RowMajor ? RowMajor : ColMajor, - CoordAccess = true, + CoordAccess = true }; static const int Options = Options_; @@ -368,7 +368,7 @@ class Tensor : public TensorBase(dim1, dim2, dim3, dim4, dim5)) + : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) { EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } From 836da91b3fa6c4b2a2413268effd7e481ec8b066 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 10:06:28 +0100 Subject: [PATCH 318/344] Fix unit tests wrt EIGEN_DEFAULT_TO_ROW_MAJOR --- test/is_same_dense.cpp | 11 ++++++----- test/nesting_ops.cpp | 7 ++++--- test/vectorization_logic.cpp | 5 ++++- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/test/is_same_dense.cpp b/test/is_same_dense.cpp index 318ba8717..6d7904bac 100644 --- a/test/is_same_dense.cpp +++ b/test/is_same_dense.cpp @@ -11,9 +11,10 @@ void test_is_same_dense() { - MatrixXd m1(10,10); - Ref ref_m1(m1); - Ref const_ref_m1(m1); + typedef Matrix ColMatrixXd; + ColMatrixXd m1(10,10); + Ref ref_m1(m1); + Ref const_ref_m1(m1); VERIFY(is_same_dense(m1,m1)); VERIFY(is_same_dense(m1,ref_m1)); VERIFY(is_same_dense(const_ref_m1,m1)); @@ -22,9 +23,9 @@ void test_is_same_dense() VERIFY(is_same_dense(m1.block(0,0,m1.rows(),m1.cols()),m1)); VERIFY(!is_same_dense(m1.row(0),m1.col(0))); - Ref const_ref_m1_row(m1.row(1)); + Ref const_ref_m1_row(m1.row(1)); VERIFY(!is_same_dense(m1.row(1),const_ref_m1_row)); - Ref const_ref_m1_col(m1.col(1)); + Ref const_ref_m1_col(m1.col(1)); VERIFY(is_same_dense(m1.col(1),const_ref_m1_col)); } diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 76a63400c..2f5025305 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -51,6 +51,7 @@ template void run_nesting_ops_2(const MatrixType& _m) Index rows = _m.rows(); Index cols = _m.cols(); MatrixType m1 = MatrixType::Random(rows,cols); + Matrix m2; if((MatrixType::SizeAtCompileTime==Dynamic)) { @@ -79,9 +80,9 @@ template void run_nesting_ops_2(const MatrixType& _m) } VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); VERIFY( verify_eval_type<3>(m1+m1, m1) ); - VERIFY( verify_eval_type<1>(m1*m1.transpose(), m1) ); - VERIFY( verify_eval_type<1>(m1*(m1+m1).transpose(), m1) ); - VERIFY( verify_eval_type<2>(m1*m1.transpose(), m1) ); + VERIFY( verify_eval_type<1>(m1*m1.transpose(), m2) ); + VERIFY( verify_eval_type<1>(m1*(m1+m1).transpose(), m2) ); + VERIFY( verify_eval_type<2>(m1*m1.transpose(), m2) ); VERIFY( verify_eval_type<1>(m1+m1*m1, m1) ); VERIFY( verify_eval_type<1>(m1.template triangularView().solve(m1), m1) ); diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index da60a2f3a..35fbb9781 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -1,12 +1,15 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif #define EIGEN_DEBUG_ASSIGN #include "main.h" #include From 79c1e6d0a63883cec691eaebcdbf0935ad557f70 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 10:55:07 +0100 Subject: [PATCH 319/344] Fix compilation of MKL support. --- .../src/Core/products/SelfadjointMatrixVector_MKL.h | 13 ++++++------- Eigen/src/Core/util/DisableStupidWarnings.h | 4 +++- Eigen/src/Eigenvalues/ComplexSchur_MKL.h | 8 ++++---- Eigen/src/Eigenvalues/RealSchur_MKL.h | 6 +++--- Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h | 8 ++++---- Eigen/src/QR/ColPivHouseholderQR_MKL.h | 4 ++-- 6 files changed, 22 insertions(+), 21 deletions(-) mode change 100644 => 100755 Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h mode change 100644 => 100755 Eigen/src/Core/util/DisableStupidWarnings.h mode change 100644 => 100755 Eigen/src/Eigenvalues/ComplexSchur_MKL.h mode change 100644 => 100755 Eigen/src/Eigenvalues/RealSchur_MKL.h mode change 100644 => 100755 Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h mode change 100644 => 100755 Eigen/src/QR/ColPivHouseholderQR_MKL.h diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h old mode 100644 new mode 100755 index 86684b66d..a08f385bc --- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -52,16 +52,16 @@ template { \ static void run( \ Index size, const Scalar* lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ + const Scalar* _rhs, Scalar* res, Scalar alpha) { \ enum {\ IsColMajor = StorageOrder==ColMajor \ }; \ if (IsColMajor == ConjugateLhs) {\ selfadjoint_matrix_vector_product::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + size, lhs, lhsStride, _rhs, res, alpha); \ } else {\ selfadjoint_matrix_vector_product_symv::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ + size, lhs, lhsStride, _rhs, res, alpha); \ }\ } \ }; \ @@ -79,13 +79,13 @@ typedef Matrix SYMVVector;\ \ static void run( \ Index size, const EIGTYPE* lhs, Index lhsStride, \ -const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ +const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \ { \ enum {\ IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ IsLower = UpLo == Lower ? 1 : 0 \ }; \ - MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ + MKL_INT n=size, lda=lhsStride, incx=1, incy=1; \ MKLTYPE alpha_, beta_; \ const EIGTYPE *x_ptr, myone(1); \ char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ @@ -93,10 +93,9 @@ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ assign_scalar_eig2mkl(beta_, myone); \ SYMVVector x_tmp; \ if (ConjugateRhs) { \ - Map > map_x(_rhs,size,1,InnerStride<>(incx)); \ + Map map_x(_rhs,size,1); \ x_tmp=map_x.conjugate(); \ x_ptr=x_tmp.data(); \ - incx=1; \ } else x_ptr=_rhs; \ MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ }\ diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h old mode 100644 new mode 100755 index 46c141ad5..747232938 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -25,10 +25,12 @@ // typedef that may be a reference type. // 279 - controlling expression is constant // ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case. + // 1684 - conversion from pointer to same-sized integral type (potential portability problem) + // 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning push #endif - #pragma warning disable 2196 279 + #pragma warning disable 2196 279 1684 2259 #elif defined __clang__ // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant // this is really a stupid warning as it warns on compile-time expressions involving enums diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h old mode 100644 new mode 100755 index 27aed923c..931573a4e --- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h @@ -40,9 +40,9 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template inline \ ComplexSchur >& \ -ComplexSchur >::compute(const Matrix& matrix, bool computeU) \ +ComplexSchur >::compute(const EigenBase& matrix, bool computeU) \ { \ typedef Matrix MatrixType; \ typedef MatrixType::RealScalar RealScalar; \ @@ -53,7 +53,7 @@ ComplexSchur >::compute(const Matri m_matUisUptodate = false; \ if(matrix.cols() == 1) \ { \ - m_matT = matrix.cast(); \ + m_matT = matrix.template cast(); \ if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \ m_info = Success; \ m_isInitialized = true; \ @@ -61,7 +61,6 @@ ComplexSchur >::compute(const Matri return *this; \ } \ lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ lapack_int matrix_order = MKLCOLROW; \ char jobvs, sort='N'; \ LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \ @@ -69,6 +68,7 @@ ComplexSchur >::compute(const Matri m_matU.resize(n, n); \ lapack_int ldvs = m_matU.outerStride(); \ m_matT = matrix; \ + lapack_int lda = m_matT.outerStride(); \ Matrix w; \ w.resize(n, 1);\ info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ diff --git a/Eigen/src/Eigenvalues/RealSchur_MKL.h b/Eigen/src/Eigenvalues/RealSchur_MKL.h old mode 100644 new mode 100755 index c3089b468..e80926400 --- a/Eigen/src/Eigenvalues/RealSchur_MKL.h +++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h @@ -40,14 +40,13 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template inline \ RealSchur >& \ -RealSchur >::compute(const Matrix& matrix, bool computeU) \ +RealSchur >::compute(const EigenBase& matrix, bool computeU) \ { \ eigen_assert(matrix.cols() == matrix.rows()); \ \ lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ lapack_int matrix_order = MKLCOLROW; \ char jobvs, sort='N'; \ LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \ @@ -55,6 +54,7 @@ RealSchur >::compute(const Matrix wr, wi; \ wr.resize(n, 1); wi.resize(n, 1); \ info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h old mode 100644 new mode 100755 index 17c0dadd2..3499dc78a --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h @@ -40,9 +40,9 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \ -template<> inline \ +template<> template inline \ SelfAdjointEigenSolver >& \ -SelfAdjointEigenSolver >::compute(const Matrix& matrix, int options) \ +SelfAdjointEigenSolver >::compute(const EigenBase& matrix, int options) \ { \ eigen_assert(matrix.cols() == matrix.rows()); \ eigen_assert((options&~(EigVecMask|GenEigMask))==0 \ @@ -56,7 +56,7 @@ SelfAdjointEigenSolver >::compute(c \ if(n==1) \ { \ - m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); \ + m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); \ if(computeEigenvectors) m_eivec.setOnes(n,n); \ m_info = Success; \ m_isInitialized = true; \ @@ -64,7 +64,7 @@ SelfAdjointEigenSolver >::compute(c return *this; \ } \ \ - lda = matrix.outerStride(); \ + lda = m_eivec.outerStride(); \ matrix_order=MKLCOLROW; \ char jobz, uplo='L'/*, range='A'*/; \ jobz = computeEigenvectors ? 'V' : 'N'; \ diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h old mode 100644 new mode 100755 index 7b6ba0a5e..fce4df08c --- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h @@ -41,10 +41,10 @@ namespace Eigen { /** \internal Specialization for the data types supported by MKL */ #define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ -template<> inline \ +template<> template inline \ ColPivHouseholderQR >& \ ColPivHouseholderQR >::compute( \ - const Matrix& matrix) \ + const EigenBase& matrix) \ \ { \ using std::abs; \ From 30b5c4cd14bcb9998916e6d782bc3b06465ec510 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 10:59:39 +0100 Subject: [PATCH 320/344] Remove useless "explicit", and fix inline/static order. --- Eigen/src/Core/AssignEvaluator.h | 4 ++-- Eigen/src/Core/VectorwiseOp.h | 2 +- Eigen/src/Eigenvalues/GeneralizedEigenSolver.h | 2 +- Eigen/src/Eigenvalues/RealQZ.h | 2 +- Eigen/src/SVD/JacobiSVD.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) mode change 100644 => 100755 Eigen/src/Core/AssignEvaluator.h mode change 100644 => 100755 Eigen/src/Core/VectorwiseOp.h mode change 100644 => 100755 Eigen/src/Eigenvalues/GeneralizedEigenSolver.h mode change 100644 => 100755 Eigen/src/Eigenvalues/RealQZ.h mode change 100644 => 100755 Eigen/src/SVD/JacobiSVD.h diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h old mode 100644 new mode 100755 index db3bef38d..9dfffbcc4 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -606,7 +606,7 @@ public: assignPacket(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index rowIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 @@ -615,7 +615,7 @@ public: : inner; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index colIndexByOuterInner(Index outer, Index inner) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) { typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h old mode 100644 new mode 100755 index dbc272dae..483f71909 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -115,7 +115,7 @@ struct member_lpnorm { typedef ResultType result_type; template struct Cost { enum { value = (Size+5) * NumTraits::MulCost + (Size-1)*NumTraits::AddCost }; }; - EIGEN_DEVICE_FUNC explicit member_lpnorm() {} + EIGEN_DEVICE_FUNC member_lpnorm() {} template EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const { return mat.template lpNorm

(); } diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h old mode 100644 new mode 100755 index e2e28cd4a..a9d6790d5 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -145,7 +145,7 @@ template class GeneralizedEigenSolver * * \sa compute() */ - explicit GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) + GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true) : m_eivec(A.rows(), A.cols()), m_alphas(A.cols()), m_betas(A.cols()), diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h old mode 100644 new mode 100755 index 02ebb7d17..a62071d42 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -101,7 +101,7 @@ namespace Eigen { * * This constructor calls compute() to compute the QZ decomposition. */ - explicit RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : + RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) : m_S(A.rows(),A.cols()), m_T(A.rows(),A.cols()), m_Q(A.rows(),A.cols()), diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h old mode 100644 new mode 100755 index e29d36cf2..cb918860c --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -539,7 +539,7 @@ template class JacobiSVD * according to the specified problem size. * \sa JacobiSVD() */ - explicit JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) + JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0) { allocate(rows, cols, computationOptions); } From bcb4f126a735e68d7d2dcd08c853a89a31b3440e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 11:11:00 +0100 Subject: [PATCH 321/344] Fix compilation of PardisoSupport --- Eigen/PardisoSupport | 2 -- Eigen/src/PardisoSupport/PardisoSupport.h | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) mode change 100644 => 100755 Eigen/PardisoSupport diff --git a/Eigen/PardisoSupport b/Eigen/PardisoSupport old mode 100644 new mode 100755 index 7dc9c7de0..340edf51f --- a/Eigen/PardisoSupport +++ b/Eigen/PardisoSupport @@ -14,8 +14,6 @@ #include -#include - /** \ingroup Support_modules * \defgroup PardisoSupport_Module PardisoSupport module * diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 9c18eb9b9..7c238ce3c 100755 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -117,7 +117,9 @@ class PardisoImpl : public SparseSolverBase typedef Matrix IntColVectorType; typedef Array ParameterType; enum { - ScalarIsComplex = NumTraits::IsComplex + ScalarIsComplex = NumTraits::IsComplex, + ColsAtCompileTime = Dynamic, + MaxColsAtCompileTime = Dynamic }; PardisoImpl() From 7385e6e2ef944a4be9464760066ab072ed315e1c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 11:11:19 +0100 Subject: [PATCH 322/344] Remove useless explicit --- Eigen/src/SparseLU/SparseLU.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 Eigen/src/SparseLU/SparseLU.h diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h old mode 100644 new mode 100755 index acd3ad100..d33d27f46 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -101,7 +101,8 @@ class SparseLU : public SparseSolverBase >, { initperfvalues(); } - explicit SparseLU(const MatrixType& matrix):m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) + explicit SparseLU(const MatrixType& matrix) + : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) { initperfvalues(); compute(matrix); @@ -719,7 +720,7 @@ template struct SparseLUMatrixUReturnType : internal::no_assignment_operator { typedef typename MatrixLType::Scalar Scalar; - explicit SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) + SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) : m_mapL(mapL),m_mapU(mapU) { } Index rows() { return m_mapL.rows(); } From 4519fd5d40031839ca8a9de4cc177bcbda95e360 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 11:11:38 +0100 Subject: [PATCH 323/344] Fix MKL compilation issue --- Eigen/src/QR/ColPivHouseholderQR_MKL.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/Eigen/src/QR/ColPivHouseholderQR_MKL.h index fce4df08c..1203d0d36 100755 --- a/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ b/Eigen/src/QR/ColPivHouseholderQR_MKL.h @@ -52,9 +52,9 @@ ColPivHouseholderQR Date: Fri, 11 Dec 2015 11:43:49 +0100 Subject: [PATCH 324/344] bug #1132: add EIGEN_MAPBASE_PLUGIN --- Eigen/src/Core/MapBase.h | 4 ++++ doc/PreprocessorDirectives.dox | 1 + 2 files changed, 5 insertions(+) diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index ae28d4db6..75a80daaa 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -155,6 +155,10 @@ template class MapBase checkSanity(); } + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + protected: EIGEN_DEVICE_FUNC diff --git a/doc/PreprocessorDirectives.dox b/doc/PreprocessorDirectives.dox index 76ce2eb99..7cde1a36f 100644 --- a/doc/PreprocessorDirectives.dox +++ b/doc/PreprocessorDirectives.dox @@ -106,6 +106,7 @@ following macros are supported; none of them are defined by default. - \b EIGEN_MATRIX_PLUGIN - filename of plugin for extending the Matrix class. - \b EIGEN_MATRIXBASE_PLUGIN - filename of plugin for extending the MatrixBase class. - \b EIGEN_PLAINOBJECTBASE_PLUGIN - filename of plugin for extending the PlainObjectBase class. + - \b EIGEN_MAPBASE_PLUGIN - filename of plugin for extending the MapBase class. - \b EIGEN_QUATERNION_PLUGIN - filename of plugin for extending the Quaternion class. - \b EIGEN_QUATERNIONBASE_PLUGIN - filename of plugin for extending the QuaternionBase class. - \b EIGEN_SPARSEMATRIX_PLUGIN - filename of plugin for extending the SparseMatrix class. From b60a8967f549250c2701112a029853e2d2d8ae64 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 11:59:11 +0100 Subject: [PATCH 325/344] bug #1134: fix JacobiSVD pre-allocation (grafted from f22036f5f8bbaa349e090327d246c817bac94ee4 ) --- Eigen/src/SVD/JacobiSVD.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index e29d36cf2..da1dbf5e7 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -666,7 +666,7 @@ void JacobiSVD::allocate(Index rows, Index cols, u if(m_cols>m_rows) m_qr_precond_morecols.allocate(*this); if(m_rows>m_cols) m_qr_precond_morerows.allocate(*this); - if(m_cols!=m_cols) m_scaledMatrix.resize(rows,cols); + if(m_rows!=m_cols) m_scaledMatrix.resize(rows,cols); } template From 4d708457d07c0b9169d9e8336bb4d090791acd5d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 23:07:22 +0100 Subject: [PATCH 326/344] Increase axpy vector size --- bench/btl/generic_bench/bench_parameter.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/btl/generic_bench/bench_parameter.hh b/bench/btl/generic_bench/bench_parameter.hh index 0f62bd421..2b01149f9 100644 --- a/bench/btl/generic_bench/bench_parameter.hh +++ b/bench/btl/generic_bench/bench_parameter.hh @@ -29,7 +29,7 @@ // min vector size for axpy bench #define MIN_AXPY 5 // max vector size for axpy bench -#define MAX_AXPY 1000000 +#define MAX_AXPY 3000000 // min matrix size for matrix vector product bench #define MIN_MV 5 // max matrix size for matrix vector product bench From 4483c0fdf6bbab9133211b116a53271e5dcdeec5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 23:29:53 +0100 Subject: [PATCH 327/344] Fix unused variable warning. --- Eigen/src/Core/SpecialFunctions.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 05973e372..d43cf23a1 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -21,7 +21,7 @@ template struct lgamma_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + static EIGEN_STRONG_INLINE Scalar run(const Scalar&) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); @@ -59,7 +59,7 @@ template struct erf_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + static EIGEN_STRONG_INLINE Scalar run(const Scalar&) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); @@ -97,7 +97,7 @@ template struct erfc_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar& x) + static EIGEN_STRONG_INLINE Scalar run(const Scalar&) { EIGEN_STATIC_ASSERT((internal::is_same::value == false), THIS_TYPE_IS_NOT_SUPPORTED); From 140f3a02a825e9c3d72f1adf7ff3cc2e49dffea9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 11 Dec 2015 23:31:21 +0100 Subject: [PATCH 328/344] Fix MKL wrapper for ComplexSchur --- Eigen/src/Eigenvalues/ComplexSchur_MKL.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h index 931573a4e..e20c3725b 100755 --- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h @@ -53,7 +53,7 @@ ComplexSchur >::compute(const Eigen m_matUisUptodate = false; \ if(matrix.cols() == 1) \ { \ - m_matT = matrix.template cast(); \ + m_matT = matrix.derived().template cast(); \ if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \ m_info = Success; \ m_isInitialized = true; \ From 75e19fc7cabfdb447dc740ee65399089f189e1fe Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 14 Dec 2015 15:12:55 -0800 Subject: [PATCH 329/344] Marked the tensor constructors as EIGEN_DEVICE_FUNC: This makes it possible to call them from a CUDA kernel. --- unsupported/Eigen/CXX11/src/Tensor/Tensor.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 87ac8f5aa..ad525bac8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -340,34 +340,34 @@ class Tensor : public TensorBase - inline Tensor(Index firstDimension, IndexTypes... otherDimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) : m_storage(internal::array_prod(array{{firstDimension, otherDimensions...}}), array{{firstDimension, otherDimensions...}}) { // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } #else - inline explicit Tensor(Index dim1) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1) : m_storage(dim1, array(dim1)) { EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2) : m_storage(dim1*dim2, array(dim1, dim2)) { EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2, Index dim3) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3) : m_storage(dim1*dim2*dim3, array(dim1, dim2, dim3)) { EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4) : m_storage(dim1*dim2*dim3*dim4, array(dim1, dim2, dim3, dim4)) { EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) } - inline explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) { EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -375,7 +375,7 @@ class Tensor : public TensorBase& dimensions) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array& dimensions) : m_storage(internal::array_prod(dimensions), dimensions) { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED From 17352e27928ba74c2b4131f5905f9d90ace805b2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 14 Dec 2015 15:20:31 -0800 Subject: [PATCH 330/344] Made the entire TensorFixedSize api callable from a CUDA kernel. --- unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index bf930f6b8..a4d6ce6b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -68,7 +68,7 @@ class TensorFixedSize : public TensorBase - inline const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -100,7 +100,7 @@ class TensorFixedSize : public TensorBase - inline Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -132,7 +132,7 @@ class TensorFixedSize : public TensorBase - inline const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -171,7 +171,7 @@ class TensorFixedSize : public TensorBase - inline Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) { // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -221,7 +221,7 @@ class TensorFixedSize : public TensorBase Date: Tue, 15 Dec 2015 11:34:52 +0100 Subject: [PATCH 331/344] bug #1136: Protect isinf for Intel compilers. Also don't distinguish GCC from ICC and don't rely on EIGEN_NOT_A_MACRO, which might not be defined when including this. --- unsupported/test/mpreal/mpreal.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h index c4f6cf0cb..9b0cf7268 100644 --- a/unsupported/test/mpreal/mpreal.h +++ b/unsupported/test/mpreal/mpreal.h @@ -72,14 +72,12 @@ #define MPREAL_VERSION_STRING "3.6.2" // Detect compiler using signatures from http://predef.sourceforge.net/ -#if defined(__GNUC__) && defined(__INTEL_COMPILER) - #define IsInf(x) isinf(x) // Intel ICC compiler on Linux - +#if defined(__GNUC__) + #define IsInf(x) (isinf)(x) // GNU C++/Intel ICC compiler on Linux #elif defined(_MSC_VER) // Microsoft Visual C++ #define IsInf(x) (!_finite(x)) - #else - #define IsInf(x) std::isinf EIGEN_NOT_A_MACRO (x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance + #define IsInf(x) (std::isinf)(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance #endif // A Clang feature extension to determine compiler features. @@ -3103,4 +3101,4 @@ namespace std } -#endif /* __MPREAL_H__ */ \ No newline at end of file +#endif /* __MPREAL_H__ */ From 35d8725c73cfcce45ebb774e25e51bd5ab5e61b7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 16 Dec 2015 10:14:24 +0100 Subject: [PATCH 332/344] Disable AutoDiffScalar generic copy ctor for non compatible scalar types (fix ambiguous template instantiation) --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) mode change 100644 => 100755 unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h old mode 100644 new mode 100755 index 8b58b512b..e30ad5b6d --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -99,7 +99,11 @@ class AutoDiffScalar {} template - AutoDiffScalar(const AutoDiffScalar& other) + AutoDiffScalar(const AutoDiffScalar& other +#ifndef EIGEN_PARSED_BY_DOXYGEN + , typename internal::enable_if::value,void*>::type = 0 +#endif + ) : m_value(other.value()), m_derivatives(other.derivatives()) {} @@ -127,6 +131,14 @@ class AutoDiffScalar return *this; } + inline AutoDiffScalar& operator=(const Scalar& other) + { + m_value = other; + if(m_derivatives.size()>0) + m_derivatives.setZero(); + return *this; + } + // inline operator const Scalar& () const { return m_value; } // inline operator Scalar& () { return m_value; } From ae8b217a01f07711aa2e57f6b3cf93da77d6d82a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 16 Dec 2015 10:47:03 +0100 Subject: [PATCH 333/344] Update doc to make it clear that only SuperLU 4.x is supported --- Eigen/SuperLUSupport | 2 ++ Eigen/src/SuperLUSupport/SuperLUSupport.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport index 0ae9f3fdf..113f58ee5 100644 --- a/Eigen/SuperLUSupport +++ b/Eigen/SuperLUSupport @@ -43,6 +43,8 @@ namespace Eigen { struct SluMatrix; } * - class SuperLU: a supernodal sequential LU factorization. * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * + * \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * * \code diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index b20da37f7..fd2b26581 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -452,6 +452,8 @@ class SuperLUBase : public SparseSolverBase * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * + * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * * \implsparsesolverconcept * * \sa \ref TutorialSparseDirectSolvers @@ -801,7 +803,7 @@ typename SuperLU::Scalar SuperLU::determinant() const * This class allows to solve for an approximate solution of A.X = B sparse linear problems via an incomplete LU factorization * using the SuperLU library. This class is aimed to be used as a preconditioner of the iterative linear solvers. * - * \warning This class requires SuperLU 4 or later. + * \warning This class is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * From 49d96aee6448d67edbb0382fefca746304c5baaa Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Wed, 16 Dec 2015 11:37:16 +0100 Subject: [PATCH 334/344] bug #1120: Make sure that SuperLU version is checked --- cmake/FindSuperLU.cmake | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake index 259ed7320..e4142fe4d 100644 --- a/cmake/FindSuperLU.cmake +++ b/cmake/FindSuperLU.cmake @@ -60,11 +60,21 @@ endif() cmake_pop_check_state() +if(SuperLU_FIND_VERSION) + if(${SUPERLU_VERSION_VAR} VERSION_LESS ${SuperLU_FIND_VERSION}) + set(SUPERLU_VERSION_OK FALSE) + else() + set(SUPERLU_VERSION_OK TRUE) + endif() +else() + set(SUPERLU_VERSION_OK TRUE) +endif() + endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(SUPERLU - REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES + REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK VERSION_VAR SUPERLU_VERSION_VAR) mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES) From 9f9de1aaa9a508fc6c94ddacd12b9107462f688f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 16 Dec 2015 21:48:48 +0100 Subject: [PATCH 335/344] bump to 3.3-beta1 --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index e0bc1689d..9b4f8faa7 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 2 -#define EIGEN_MINOR_VERSION 91 +#define EIGEN_MINOR_VERSION 92 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ From 55aef139ffb180fbe8106f7061e028fcf272d876 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 16 Dec 2015 21:49:02 +0100 Subject: [PATCH 336/344] Added tag 3.3-beta1 for changeset 9f9de1aaa9a508fc6c94ddacd12b9107462f688f From 2ca55a3ae45e1b5137c94267274465bf509f7c72 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 16 Dec 2015 20:45:58 -0800 Subject: [PATCH 337/344] Fixed some compilation error triggered by the tensor code with msvc 2008 --- unsupported/Eigen/CXX11/Tensor | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index c681d3c20..7481a9ddb 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -28,14 +28,22 @@ #include #include + +#ifdef _WIN32 +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else #include +#endif #if __cplusplus > 199711 #include #endif #ifdef _WIN32 -#include +#include #elif defined(__APPLE__) #include #else From 40e6250fc3737ff76224b04c94c2de3ce0d51607 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 17 Dec 2015 13:29:08 -0800 Subject: [PATCH 338/344] Made it possible to run tensor chipping operations on CUDA devices --- unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index c9fa39e51..abc3c92ca 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -50,7 +50,7 @@ struct nested, 1, typename eval struct DimensionId { - DimensionId(DenseIndex dim) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { eigen_assert(dim == DimId); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { @@ -60,7 +60,7 @@ struct DimensionId template <> struct DimensionId { - DimensionId(DenseIndex dim) : actual_dim(dim) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { eigen_assert(dim >= 0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { From 4aac55f684d9bd36b5f855fa5a8c2f17ca3094c9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 17 Dec 2015 13:39:01 -0800 Subject: [PATCH 339/344] Silenced some compilation warnings triggered by nvcc --- .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 7 +++++-- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 16 ++++++++++++---- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 6 +++--- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index c76d1ee3f..4d7570077 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -238,11 +238,14 @@ struct GpuDevice { }; - +#ifndef __CUDA_ARCH__ #define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \ (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \ assert(cudaGetLastError() == cudaSuccess); - +#else +#define LAUNCH_CUDA_KERNEL(...) \ + eigen_assert(false && "Cannot launch a kernel from another kernel"); +#endif // FIXME: Should be device and kernel specific. #ifdef __CUDACC__ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index d93e1de1b..c28078882 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -156,14 +156,14 @@ template class TensorExecutor { public: typedef typename Expression::Index Index; - static void run(const Expression& expr, const GpuDevice& device); + EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device); }; template class TensorExecutor { public: typedef typename Expression::Index Index; - static void run(const Expression& expr, const GpuDevice& device); + EIGEN_DEVICE_FUNC static void run(const Expression& expr, const GpuDevice& device); }; #if defined(__CUDACC__) @@ -213,8 +213,9 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { /*static*/ template -inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) +EIGEN_DEVICE_FUNC inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) { +#ifndef __CUDA_ARCH__ TensorEvaluator evaluator(expr, device); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) @@ -227,13 +228,17 @@ inline void TensorExecutor::run(const Expression& LAUNCH_CUDA_KERNEL((EigenMetaKernel_NonVectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); } evaluator.cleanup(); +#else + eigen_assert(false && "Cannot launch a kernel from another kernel"); +#endif } /*static*/ template -inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) +EIGEN_DEVICE_FUNC inline void TensorExecutor::run(const Expression& expr, const GpuDevice& device) { +#ifndef __CUDA_ARCH__ TensorEvaluator evaluator(expr, device); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) @@ -246,6 +251,9 @@ inline void TensorExecutor::run(const Expression& e LAUNCH_CUDA_KERNEL((EigenMetaKernel_Vectorizable, Index>), num_blocks, block_size, 0, device, evaluator, size); } evaluator.cleanup(); +#else + eigen_assert(false && "Cannot launch a kernel from another kernel"); +#endif } #endif // __CUDACC__ diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index bd15295b8..aaa877185 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -454,7 +454,7 @@ struct TensorEvaluator, Device> input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; } } - + int outputIndex = 0; int reduceIndex = 0; for (int i = 0; i < NumInputDims; ++i) { @@ -473,13 +473,13 @@ struct TensorEvaluator, Device> m_preservedStrides[0] = internal::array_prod(input_dims); } } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } typedef typename internal::remove_const::type CoeffReturnType; typedef typename internal::remove_const::type PacketReturnType; - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { m_impl.evalSubExprsIfNeeded(NULL); // Use the FullReducer if possible. From 8dd17cbe80ef460e9fbd562d6de6ae19b264caea Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 17 Dec 2015 14:00:33 -0800 Subject: [PATCH 340/344] Fixed a clang compilation warning triggered by the use of arrays of size 0. --- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 785321666..f28a9699d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -101,13 +101,18 @@ bool operator!=(const Tuple& x, const Tuple& y) { #ifdef EIGEN_HAS_SFINAE -namespace internal{ +namespace internal { template EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array customIndices2Array(IndexType& idx, numeric_list) { return { idx[Is]... }; } + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType&, numeric_list) { + return array(); + } /** Make an array (for index/dimensions) out of a custom index */ template From 3abd8470caf60473851f0c5b40ed8abff5c03931 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 18 Dec 2015 14:18:59 +0100 Subject: [PATCH 341/344] bug #1140: remove custom definition and use of _mm256_setr_m128 --- Eigen/src/Core/arch/AVX/MathFunctions.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index c4bd6bd53..9ced9b717 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -10,11 +10,6 @@ #ifndef EIGEN_MATH_FUNCTIONS_AVX_H #define EIGEN_MATH_FUNCTIONS_AVX_H -// For some reason, this function didn't make it into the avxintirn.h -// used by the compiler, so we'll just wrap it. -#define _mm256_setr_m128(lo, hi) \ - _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) - /* The sin, cos, exp, and log functions of this file are loosely derived from * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ */ @@ -63,7 +58,7 @@ psin(const Packet8f& _x) { _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31); __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31); - Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi); + Packet8i sign_flip_mask = _mm256_setr_m128(hi, lo); #endif // Create a mask for which interpolant to use, i.e. if z > 1, then the mask @@ -149,7 +144,7 @@ plog(const Packet8f& _x) { #else __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23); __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23); - Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi)); + Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_set_m128(hi,lo)); #endif Packet8f e = _mm256_sub_ps(emm0, p8f_126f); @@ -264,7 +259,7 @@ pexp(const Packet8f& _x) { #else __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 0), 23); __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(emm0, 1), 23); - emm0 = _mm256_setr_m128(lo, hi); + emm0 = _mm256_set_m128(hi,lo); #endif // Return 2^m * exp(r). From 75a7fa1919af749ba79a2b70c542320707837f61 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 18 Dec 2015 14:07:31 -0800 Subject: [PATCH 342/344] Doubled the speed of full reductions on GPUs. --- .../Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 49102fca2..af1b9432c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -87,15 +87,15 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num } typename Self::CoeffReturnType accum = reducer.initialize(); - for (Index i = 0; i < NumPerThread; ++i) { - const Index index = first_index + i * BlockSize; - if (index >= num_coeffs) { - break; - } + Index max_iter = numext::mini(num_coeffs - first_index, NumPerThread*BlockSize); + for (Index i = 0; i < max_iter; i+=BlockSize) { + const Index index = first_index + i; + eigen_assert(index < num_coeffs); typename Self::CoeffReturnType val = input.m_impl.coeff(index); reducer.reduce(val, &accum); } +#pragma unroll for (int offset = warpSize/2; offset > 0; offset /= 2) { reducer.reduce(__shfl_down(accum, offset), &accum); } From 1b829695598a823fe3d9132d35ccdbb6e176c47e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 18 Dec 2015 14:36:35 -0800 Subject: [PATCH 343/344] Add alignment requirement for local buffer used by the slicing op. --- unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index bdc86e0fa..d8c923d74 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -443,7 +443,7 @@ struct TensorEvaluator, Devi return rslt; } else { - typename internal::remove_const::type values[packetSize]; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; values[0] = m_impl.coeff(inputIndices[0]); values[packetSize-1] = m_impl.coeff(inputIndices[1]); for (int i = 1; i < packetSize-1; ++i) { From 6d777e1bc7d31023ad78c84777847896ab31927d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 18 Dec 2015 19:25:50 -0800 Subject: [PATCH 344/344] Fixed a typo. --- Eigen/src/Core/arch/AVX/MathFunctions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 9ced9b717..7baf57eca 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -58,7 +58,7 @@ psin(const Packet8f& _x) { _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31); __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31); - Packet8i sign_flip_mask = _mm256_setr_m128(hi, lo); + Packet8i sign_flip_mask = _mm256_set_m128(hi, lo); #endif // Create a mask for which interpolant to use, i.e. if z > 1, then the mask