From c569cfe12ae6b6bf246e915f0b03ca983c9f225c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 11 Feb 2016 09:33:32 -0800 Subject: [PATCH 01/13] Inline the +=, -=, *= and /= operators consistently between DenseBase.h and SelfCwiseBinaryOp.h --- Eigen/src/Core/SelfCwiseBinaryOp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 38185d9d7..78fff1549 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -13,7 +13,7 @@ namespace Eigen { template -inline Derived& DenseBase::operator*=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); @@ -21,7 +21,7 @@ inline Derived& DenseBase::operator*=(const Scalar& other) } template -inline Derived& ArrayBase::operator+=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); @@ -29,7 +29,7 @@ inline Derived& ArrayBase::operator+=(const Scalar& other) } template -inline Derived& ArrayBase::operator-=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); @@ -37,7 +37,7 @@ inline Derived& ArrayBase::operator-=(const Scalar& other) } template -inline Derived& DenseBase::operator/=(const Scalar& other) +EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); From eeac46f98012ba4a69060f8d3bc365e04f1edaa7 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Thu, 11 Feb 2016 19:38:37 +0100 Subject: [PATCH 02/13] bug #774: re-added comment referencing equations in the original paper --- Eigen/src/Geometry/Umeyama.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index 6943f719e..7e933fca1 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -139,6 +139,7 @@ umeyama(const MatrixBase& src, const MatrixBase& dst, boo if ( svd.matrixU().determinant() * svd.matrixV().determinant() < 0 ) S(m-1) = -1; + // Eq. (40) and (43) Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose(); if (with_scaling) From 3628f7655d5063c4a7e67c6efc9e4ba10c31892c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 11 Feb 2016 15:05:03 -0800 Subject: [PATCH 03/13] Made it possible to run the scalar_binary_pow_op functor on GPU --- Eigen/src/Core/MathFunctions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index e87b60f8f..447f1b834 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -496,7 +496,7 @@ template struct pow_default_impl { typedef Scalar retval; - static inline Scalar run(const Scalar& x, const Scalar& y) + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { EIGEN_USING_STD_MATH(pow); return pow(x, y); @@ -506,7 +506,7 @@ struct pow_default_impl template struct pow_default_impl { - static inline Scalar run(Scalar x, Scalar y) + static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y) { Scalar res(1); eigen_assert(!NumTraits::IsSigned || y >= 0); From de345eff2e7e41505224e04c47e2a91b020b5a5a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 11 Feb 2016 16:34:07 -0800 Subject: [PATCH 04/13] Added a method to conjugate the content of a tensor or the result of a tensor expression. --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 6 ++++++ unsupported/test/cxx11_tensor_of_complex.cpp | 20 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index cca716d6f..4dea1d3a0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -170,6 +170,12 @@ class TensorBase return unaryExpr(internal::scalar_abs_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + conjugate() const { + return unaryExpr(internal::scalar_conjugate_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> pow(Scalar exponent) const { diff --git a/unsupported/test/cxx11_tensor_of_complex.cpp b/unsupported/test/cxx11_tensor_of_complex.cpp index 8ad04f699..25e51143e 100644 --- a/unsupported/test/cxx11_tensor_of_complex.cpp +++ b/unsupported/test/cxx11_tensor_of_complex.cpp @@ -48,6 +48,25 @@ static void test_abs() } +static void test_conjugate() +{ + Tensor, 1> data1(3); + Tensor, 1> data2(3); + Tensor data3(3); + data1.setRandom(); + data2.setRandom(); + data3.setRandom(); + + Tensor, 1> conj1 = data1.conjugate(); + Tensor, 1> conj2 = data2.conjugate(); + Tensor conj3 = data3.conjugate(); + for (int i = 0; i < 3; ++i) { + VERIFY_IS_APPROX(conj1(i), std::conj(data1(i))); + VERIFY_IS_APPROX(conj2(i), std::conj(data2(i))); + VERIFY_IS_APPROX(conj3(i), data3(i)); + } +} + static void test_contractions() { Tensor, 4> t_left(30, 50, 8, 31); @@ -77,5 +96,6 @@ void test_cxx11_tensor_of_complex() { CALL_SUBTEST(test_additions()); CALL_SUBTEST(test_abs()); + CALL_SUBTEST(test_conjugate()); CALL_SUBTEST(test_contractions()); } From 9e3f3a2d272d6efa6845cd560da1a5546f93ff61 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 11 Feb 2016 17:27:35 -0800 Subject: [PATCH 05/13] Deleted outdated comment --- unsupported/test/cxx11_tensor_cuda.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 60f9314a5..58da21d3b 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -7,8 +7,6 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -// TODO(mdevin): Free the cuda memory. - #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_cuda From b35d1a122ec2702cb5e6a262b6d34b3098f998b3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 12 Feb 2016 15:31:16 +0100 Subject: [PATCH 06/13] Fix unit test: accessing elements in a deque by offsetting a pointer to another element causes undefined behavior. --- test/stddeque_overload.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/stddeque_overload.cpp b/test/stddeque_overload.cpp index d887e35ba..4da618bbf 100644 --- a/test/stddeque_overload.cpp +++ b/test/stddeque_overload.cpp @@ -48,7 +48,6 @@ void check_stddeque_matrix(const MatrixType& m) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType)); // do a lot of push_back such that the deque gets internally resized // (with memory reallocation) From 0a537cb2d87ada8206ec2271fb9f2904a18ccfce Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 12 Feb 2016 15:58:31 +0100 Subject: [PATCH 07/13] bug #901: fix triangular-view with unit diagonal of sparse rectangular matrices. --- Eigen/src/SparseCore/SparseTriangularView.h | 21 ++++++++++++--------- test/sparse_basic.cpp | 5 ++--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 7c718e4e1..2c6aedaf9 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -70,20 +70,20 @@ class TriangularViewImpl::InnerIterator : public MatrixT public: EIGEN_STRONG_INLINE InnerIterator(const TriangularViewImpl& view, Index outer) - : Base(view.derived().nestedExpression(), outer), m_returnOne(false) + : Base(view.derived().nestedExpression(), outer), m_returnOne(false), m_containsDiag(Base::outer()index()<=outer : this->index()=Base::outer())) { if((!SkipFirst) && Base::operator bool()) Base::operator++(); - m_returnOne = true; + m_returnOne = m_containsDiag; } } @@ -98,7 +98,7 @@ class TriangularViewImpl::InnerIterator : public MatrixT { if((!SkipFirst) && Base::operator bool()) Base::operator++(); - m_returnOne = true; + m_returnOne = m_containsDiag; } } return *this; @@ -130,6 +130,7 @@ class TriangularViewImpl::InnerIterator : public MatrixT } protected: bool m_returnOne; + bool m_containsDiag; }; template @@ -193,7 +194,7 @@ public: Flags = XprType::Flags }; - explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()) {} + explicit unary_evaluator(const XprType &xpr) : m_argImpl(xpr.nestedExpression()), m_arg(xpr.nestedExpression()) {} inline Index nonZerosEstimate() const { return m_argImpl.nonZerosEstimate(); @@ -205,20 +206,20 @@ public: public: EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& xprEval, Index outer) - : Base(xprEval.m_argImpl,outer), m_returnOne(false) + : Base(xprEval.m_argImpl,outer), m_returnOne(false), m_containsDiag(Base::outer()index()<=outer : this->index()=Base::outer())) { if((!SkipFirst) && Base::operator bool()) Base::operator++(); - m_returnOne = true; // FIXME check innerSize()>outer(); + m_returnOne = m_containsDiag; } } @@ -233,7 +234,7 @@ public: { if((!SkipFirst) && Base::operator bool()) Base::operator++(); - m_returnOne = true; // FIXME check innerSize()>outer(); + m_returnOne = m_containsDiag; } } return *this; @@ -266,12 +267,14 @@ public: protected: bool m_returnOne; + bool m_containsDiag; private: Scalar& valueRef(); }; protected: evaluator m_argImpl; + const ArgType& m_arg; }; } // end namespace internal diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 0a06c828b..cb8ebaedf 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -21,8 +21,8 @@ template void sparse_basic(const SparseMatrixType& re const Index rows = ref.rows(); const Index cols = ref.cols(); - const Index inner = ref.innerSize(); - const Index outer = ref.outerSize(); + //const Index inner = ref.innerSize(); + //const Index outer = ref.outerSize(); typedef typename SparseMatrixType::Scalar Scalar; enum { Flags = SparseMatrixType::Flags }; @@ -327,7 +327,6 @@ template void sparse_basic(const SparseMatrixType& re m3 = m2.template triangularView(); VERIFY_IS_APPROX(m3, refMat3); - if(inner>=outer) // FIXME this should be implemented for outer>inner as well { refMat3 = refMat2.template triangularView(); m3 = m2.template triangularView(); From 2f5f56a8207d61c890ae47c05ad7e1ec2ac94dbb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 12 Feb 2016 16:13:16 +0100 Subject: [PATCH 08/13] Fix usage of evaluator in sparse * permutation products. --- Eigen/src/SparseCore/SparseSelfAdjointView.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 402733cce..b92bb17e2 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -387,7 +387,10 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix Dest; typedef Matrix VectorI; + typedef evaluator MatEval; + typedef typename evaluator::InnerIterator MatIterator; + MatEval matEval(mat); Dest& dest(_dest.derived()); enum { StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor) @@ -401,7 +404,7 @@ void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix(it.index()); Index r = it.row(); @@ -474,12 +477,17 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrix& dest(_dest.derived()); typedef Matrix VectorI; + typedef evaluator MatEval; + typedef typename evaluator::InnerIterator MatIterator; + enum { SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor, StorageOrderMatch = int(SrcOrder) == int(DstOrder), DstMode = DstOrder==RowMajor ? (_DstMode==Upper ? Lower : Upper) : _DstMode, SrcMode = SrcOrder==RowMajor ? (_SrcMode==Upper ? Lower : Upper) : _SrcMode }; + + MatEval matEval(mat); Index size = mat.rows(); VectorI count(size); @@ -488,7 +496,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrixj)) @@ -508,7 +516,7 @@ void permute_symm_to_symm(const MatrixType& mat, SparseMatrixj)) From 4252af6897a2eb0f0bd725ef77f6cb2a979104ca Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 12 Feb 2016 16:13:35 +0100 Subject: [PATCH 09/13] Remove dead code. --- Eigen/src/SparseCore/SparseTriangularView.h | 106 -------------------- 1 file changed, 106 deletions(-) diff --git a/Eigen/src/SparseCore/SparseTriangularView.h b/Eigen/src/SparseCore/SparseTriangularView.h index 2c6aedaf9..0c27855d5 100644 --- a/Eigen/src/SparseCore/SparseTriangularView.h +++ b/Eigen/src/SparseCore/SparseTriangularView.h @@ -43,9 +43,6 @@ template class TriangularViewImpl::type MatrixTypeNestedNonRef; typedef typename internal::remove_all::type MatrixTypeNestedCleaned; @@ -63,109 +60,6 @@ template class TriangularViewImpl -class TriangularViewImpl::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator -{ - typedef typename MatrixTypeNestedCleaned::InnerIterator Base; - public: - - EIGEN_STRONG_INLINE InnerIterator(const TriangularViewImpl& view, Index outer) - : Base(view.derived().nestedExpression(), outer), m_returnOne(false), m_containsDiag(Base::outer()index()<=outer : this->index()=Base::outer())) - { - if((!SkipFirst) && Base::operator bool()) - Base::operator++(); - m_returnOne = m_containsDiag; - } - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() - { - if(HasUnitDiag && m_returnOne) - m_returnOne = false; - else - { - Base::operator++(); - if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer())) - { - if((!SkipFirst) && Base::operator bool()) - Base::operator++(); - m_returnOne = m_containsDiag; - } - } - return *this; - } - - inline Index row() const { return (MatrixType::Flags&RowMajorBit ? Base::outer() : this->index()); } - inline Index col() const { return (MatrixType::Flags&RowMajorBit ? this->index() : Base::outer()); } - inline StorageIndex index() const - { - if(HasUnitDiag && m_returnOne) return Base::outer(); - else return Base::index(); - } - inline Scalar value() const - { - if(HasUnitDiag && m_returnOne) return Scalar(1); - else return Base::value(); - } - - EIGEN_STRONG_INLINE operator bool() const - { - if(HasUnitDiag && m_returnOne) - return true; - if(SkipFirst) return Base::operator bool(); - else - { - if (SkipDiag) return (Base::operator bool() && this->index() < this->outer()); - else return (Base::operator bool() && this->index() <= this->outer()); - } - } - protected: - bool m_returnOne; - bool m_containsDiag; -}; - -template -class TriangularViewImpl::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator -{ - typedef typename MatrixTypeNestedCleaned::ReverseInnerIterator Base; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const TriangularViewType& view, Index outer) - : Base(view.derived().nestedExpression(), outer) - { - eigen_assert((!HasUnitDiag) && "ReverseInnerIterator does not support yet triangular views with a unit diagonal"); - if(SkipLast) { - while((*this) && (SkipDiag ? this->index()>=outer : this->index()>outer)) - --(*this); - } - } - - EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() - { Base::operator--(); return *this; } - - inline Index row() const { return Base::row(); } - inline Index col() const { return Base::col(); } - - EIGEN_STRONG_INLINE operator bool() const - { - if (SkipLast) return Base::operator bool() ; - else - { - if(SkipDiag) return (Base::operator bool() && this->index() > this->outer()); - else return (Base::operator bool() && this->index() >= this->outer()); - } - } -}; - namespace internal { template From 6eff3e51852b5d15e5c21997f3bdf4ba3122696b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 12 Feb 2016 17:09:28 +0100 Subject: [PATCH 10/13] Fix triangularView versus triangularPart. --- doc/TemplateKeyword.dox | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/TemplateKeyword.dox b/doc/TemplateKeyword.dox index e06aba7ba..b84cfdae9 100644 --- a/doc/TemplateKeyword.dox +++ b/doc/TemplateKeyword.dox @@ -73,13 +73,13 @@ for operator<". The reason that the \c template keyword is necessary in the last example has to do with the rules for how templates are supposed to be compiled in C++. The compiler has to check the code for correct syntax at the point where the template is defined, without knowing the actual value of the template arguments (\c Derived1 -and \c Derived2 in the example). That means that the compiler cannot know that dst.triangularPart is +and \c Derived2 in the example). That means that the compiler cannot know that dst.triangularView is a member template and that the following < symbol is part of the delimiter for the template -parameter. Another possibility would be that dst.triangularPart is a member variable with the < +parameter. Another possibility would be that dst.triangularView is a member variable with the < symbol refering to the operator<() function. In fact, the compiler should choose the second -possibility, according to the standard. If dst.triangularPart is a member template (as in our case), +possibility, according to the standard. If dst.triangularView is a member template (as in our case), the programmer should specify this explicitly with the \c template keyword and write dst.template -triangularPart. +triangularView. The precise rules are rather complicated, but ignoring some subtleties we can summarize them as follows: - A dependent name is name that depends (directly or indirectly) on a template parameter. In the From c8b4c4b48a41a1744c9ad7a888e2bcad23250904 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 12 Feb 2016 22:09:16 +0100 Subject: [PATCH 11/13] bug #795: mention allocate_shared as a condidate for aligned_allocator. --- doc/UnalignedArrayAssert.dox | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/UnalignedArrayAssert.dox b/doc/UnalignedArrayAssert.dox index 8c97d7874..65ab16fb7 100644 --- a/doc/UnalignedArrayAssert.dox +++ b/doc/UnalignedArrayAssert.dox @@ -7,8 +7,8 @@ Hello! You are seeing this webpage because your program terminated on an asserti my_program: path/to/eigen/Eigen/src/Core/DenseStorage.h:44: Eigen::internal::matrix_array::internal::matrix_array() [with T = double, int Size = 2, int MatrixOptions = 2, bool Align = true]: -Assertion `(reinterpret_cast(array) & 0xf) == 0 && "this assertion -is explained here: http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html +Assertion `(reinterpret_cast(array) & (sizemask)) == 0 && "this assertion +is explained here: http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html **** READ THIS WEB PAGE !!! ****"' failed. @@ -46,9 +46,9 @@ then you need to read this separate page: \ref TopicStructHavingEigenMembers "St Note that here, Eigen::Vector2d is only used as an example, more generally the issue arises for all \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types". -\section c2 Cause 2: STL Containers +\section c2 Cause 2: STL Containers or manual memory allocation -If you use STL Containers such as std::vector, std::map, ..., with Eigen objects, or with classes containing Eigen objects, like this, +If you use STL Containers such as std::vector, std::map, ..., with %Eigen objects, or with classes containing %Eigen objects, like this, \code std::vector my_vector; @@ -60,6 +60,8 @@ then you need to read this separate page: \ref TopicStlContainers "Using STL Con Note that here, Eigen::Matrix2f is only used as an example, more generally the issue arises for all \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types" and \ref TopicStructHavingEigenMembers "structures having such Eigen objects as member". +The same issue will be exhibited by any classes/functions by-passing operator new to allocate memory, that is, by performing custom memory allocation followed by calls to the placement new operator. This is for instance typically the case of \c std::make_shared or \c std::allocate_shared for which is the solution is to use an \ref aligned_allocator "aligned allocator" as detailed in the \ref TopicStlContainers "solution for STL containers". + \section c3 Cause 3: Passing Eigen objects by value If some function in your code is getting an Eigen object passed by value, like this, From 8e1f1ba6a6cf0580da6f8756562f94b6410d5e58 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 12 Feb 2016 22:16:59 +0100 Subject: [PATCH 12/13] Import wiki's paragraph: "I disabled vectorization, but I'm still getting annoyed about alignment issues" --- doc/UnalignedArrayAssert.dox | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/UnalignedArrayAssert.dox b/doc/UnalignedArrayAssert.dox index 65ab16fb7..f0f84d25f 100644 --- a/doc/UnalignedArrayAssert.dox +++ b/doc/UnalignedArrayAssert.dox @@ -109,7 +109,10 @@ Two possibilities: 128-bit alignment code and thus preserves ABI compatibility, but completely disables vectorization. -For more information, see this FAQ. +If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 128-bit alignment and the assertion, here's the explanation: + +It doesn't disable the assertion, because otherwise code that runs fine without vectorization would suddenly crash when enabling vectorization. +It doesn't disable 128bit alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path. */ From f6f057bb7d3fcd24b751cba2e70d416f4a803e1f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 15 Feb 2016 21:43:07 +0100 Subject: [PATCH 13/13] bug #1166: fix shortcomming in gemv when the destination is not a vector at compile-time. --- Eigen/src/Core/GeneralProduct.h | 11 +++++++---- test/product.h | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 0769a212e..53f934999 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -213,15 +213,18 @@ template<> struct gemv_dense_selector ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) * RhsBlasTraits::extractScalarFactor(rhs); + // make sure Dest is a compile-time vector type (bug 1166) + typedef typename conditional::type ActualDest; + enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1, + EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), - MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal + MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal }; - gemv_static_vector_if static_dest; + gemv_static_vector_if static_dest; const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; @@ -314,7 +317,7 @@ template<> struct gemv_dense_selector actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()), RhsMapper(actualRhsPtr, 1), - dest.data(), dest.innerStride(), + dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) actualAlpha); } }; diff --git a/test/product.h b/test/product.h index bd92309d2..45bb64958 100644 --- a/test/product.h +++ b/test/product.h @@ -144,6 +144,22 @@ template void product(const MatrixType& m) VERIFY_IS_APPROX(res.col(r).noalias() = square.adjoint() * square.col(r), (square.adjoint() * square.col(r)).eval()); VERIFY_IS_APPROX(res.col(r).noalias() = square * square.col(r), (square * square.col(r)).eval()); + // vector at runtime (see bug 1166) + { + RowSquareMatrixType ref(square); + ColSquareMatrixType ref2(square2); + ref = res = square; + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.col(0).transpose() * square.transpose(), (ref.row(0) = m1.col(0).transpose() * square.transpose())); + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.block(0,0,rows,1).transpose() * square.transpose(), (ref.row(0) = m1.col(0).transpose() * square.transpose())); + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.col(0).transpose() * square, (ref.row(0) = m1.col(0).transpose() * square)); + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.block(0,0,rows,1).transpose() * square, (ref.row(0) = m1.col(0).transpose() * square)); + ref2 = res2 = square2; + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.row(0) * square2.transpose(), (ref2.row(0) = m1.row(0) * square2.transpose())); + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2.transpose(), (ref2.row(0) = m1.row(0) * square2.transpose())); + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.row(0) * square2, (ref2.row(0) = m1.row(0) * square2)); + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2, (ref2.row(0) = m1.row(0) * square2)); + } + // inner product { Scalar x = square2.row(c) * square2.col(c2);