diff --git a/CMakeLists.txt b/CMakeLists.txt index eefaf4b47..4016de370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,11 +98,18 @@ if(CMAKE_COMPILER_IS_GNUCXX) message("Enabling SSE4.2 in tests/examples") endif() - option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF) + option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF) if(EIGEN_TEST_ALTIVEC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") message("Enabling AltiVec in tests/examples") endif() + + option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a8") + message("Enabling NEON in tests/examples") + endif() + endif(CMAKE_COMPILER_IS_GNUCXX) if(MSVC) diff --git a/Eigen/Core b/Eigen/Core index 26195cd35..075f95e5a 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2007-2009 Benoit Jacob +// Copyright (C) 2007-2010 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -61,20 +61,45 @@ #ifndef EIGEN_DONT_VECTORIZE #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) + + // Defines symbols for compile-time detection of which instructions are + // used. + // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_SSE - #include - #include + #define EIGEN_VECTORIZE_SSE2 + + // Detect sse3/ssse3/sse4: + // gcc and icc defines __SSE3__, .., + // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you + // want to force the use of those instructions with msvc. #ifdef __SSE3__ - #include + #define EIGEN_VECTORIZE_SSE3 #endif #ifdef __SSSE3__ - #include + #define EIGEN_VECTORIZE_SSSE3 #endif #ifdef __SSE4_1__ - #include + #define EIGEN_VECTORIZE_SSE4_1 #endif #ifdef __SSE4_2__ + #define EIGEN_VECTORIZE_SSE4_2 + #endif + + // include files + + #include + #include + #ifdef EIGEN_VECTORIZE_SSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 #include #endif #elif defined __ALTIVEC__ @@ -86,6 +111,10 @@ #undef bool #undef vector #undef pixel + #elif defined __ARM_NEON__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_NEON + #include #endif #endif @@ -97,18 +126,24 @@ #include #endif +#include #include #include #include #include #include -#include +#include #include #include #include // for min/max: #include +// for outputting debug info +#ifdef EIGEN_DEBUG_ASSIGN +#include +#endif + #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_NO_EXCEPTIONS) #define EIGEN_EXCEPTIONS #endif @@ -129,6 +164,26 @@ namespace Eigen { +inline static const char *SimdInstructionSetsInUse(void) { +#if defined(EIGEN_VECTORIZE_SSE4_2) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_1) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; +#elif defined(EIGEN_VECTORIZE_SSSE3) + return "SSE, SSE2, SSE3, SSSE3"; +#elif defined(EIGEN_VECTORIZE_SSE3) + return "SSE, SSE2, SSE3"; +#elif defined(EIGEN_VECTORIZE_SSE2) + return "SSE, SSE2"; +#elif defined(EIGEN_VECTORIZE_ALTIVEC) + return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_NEON) + return "ARM NEON"; +#else + return "None"; +#endif +} + // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to // ensure QNX/QCC support using std::size_t; @@ -163,11 +218,11 @@ struct Dense {}; #include "src/Core/arch/SSE/MathFunctions.h" #elif defined EIGEN_VECTORIZE_ALTIVEC #include "src/Core/arch/AltiVec/PacketMath.h" +#elif defined EIGEN_VECTORIZE_NEON + #include "src/Core/arch/NEON/PacketMath.h" #endif -#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD -#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 -#endif +#include "src/Core/arch/Default/Settings.h" #include "src/Core/Functors.h" #include "src/Core/DenseBase.h" @@ -196,6 +251,7 @@ struct Dense {}; #include "src/Core/Dot.h" #include "src/Core/StableNorm.h" #include "src/Core/MapBase.h" +#include "src/Core/Stride.h" #include "src/Core/Map.h" #include "src/Core/Block.h" #include "src/Core/VectorBlock.h" @@ -231,28 +287,6 @@ struct Dense {}; #include "src/Core/products/TriangularSolverMatrix.h" #include "src/Core/BandMatrix.h" -/** \defgroup Array_Module Array module - * \ingroup Core_Module - * This module provides several handy features to manipulate matrices as simple array of values. - * In addition to listed classes, it defines various methods of the Cwise interface - * (accessible from MatrixBase::cwise()), including: - * - matrix-scalar sum, - * - coeff-wise comparison operators, - * - sin, cos, sqrt, pow, exp, log, square, cube, inverse (reciprocal). - * - * This module also provides various MatrixBase methods, including: - * - boolean reductions: \ref MatrixBase::all() "all", \ref MatrixBase::any() "any", \ref MatrixBase::count() "count", - * - \ref MatrixBase::Random() "random matrix initialization", - * - a \ref MatrixBase::select() "select" function mimicking the trivariate ?: operator, - * - \ref MatrixBase::colwise() "column-wise" and \ref MatrixBase::rowwise() "row-wise" reductions, - * - \ref MatrixBase::reverse() "matrix reverse", - * - \ref MatrixBase::lpNorm() "generic matrix norm". - * - * \code - * #include - * \endcode - */ - #include "src/Array/Functors.h" #include "src/Array/BooleanRedux.h" #include "src/Array/Select.h" diff --git a/Eigen/Eigen2Support b/Eigen/Eigen2Support index bd6306aff..26b547b9b 100644 --- a/Eigen/Eigen2Support +++ b/Eigen/Eigen2Support @@ -26,7 +26,7 @@ #define EIGEN2SUPPORT_H #if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H)) -#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before any other Eigen header +#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header #endif #include "src/Core/util/DisableMSVCWarnings.h" @@ -36,6 +36,7 @@ namespace Eigen { /** \defgroup Eigen2Support_Module Eigen2 support module * This module provides a couple of deprecated functions improving the compatibility with Eigen2. * + * To use it, define EIGEN2_SUPPORT before including any Eigen header * \code * #define EIGEN2_SUPPORT * \endcode @@ -51,4 +52,7 @@ namespace Eigen { #include "src/Core/util/EnableMSVCWarnings.h" +// Eigen2 used to include iostream +#include + #endif // EIGEN2SUPPORT_H diff --git a/Eigen/src/Array/Array.h b/Eigen/src/Array/Array.h index 5a398d849..91a091152 100644 --- a/Eigen/src/Array/Array.h +++ b/Eigen/src/Array/Array.h @@ -213,6 +213,9 @@ class Array void swap(ArrayBase EIGEN_REF_TO_TEMPORARY other) { this->_swap(other.derived()); } + inline int innerStride() const { return 1; } + inline int outerStride() const { return this->innerSize(); } + #ifdef EIGEN_ARRAY_PLUGIN #include EIGEN_ARRAY_PLUGIN #endif diff --git a/Eigen/src/Array/ArrayWrapper.h b/Eigen/src/Array/ArrayWrapper.h index 75bc33770..0075dd537 100644 --- a/Eigen/src/Array/ArrayWrapper.h +++ b/Eigen/src/Array/ArrayWrapper.h @@ -55,7 +55,8 @@ class ArrayWrapper : public ArrayBase > inline int rows() const { return m_expression.rows(); } inline int cols() const { return m_expression.cols(); } - inline int stride() const { return m_expression.stride(); } + inline int outerStride() const { return m_expression.outerStride(); } + inline int innerStride() const { return m_expression.innerStride(); } inline const CoeffReturnType coeff(int row, int col) const { @@ -139,7 +140,8 @@ class MatrixWrapper : public MatrixBase > inline int rows() const { return m_expression.rows(); } inline int cols() const { return m_expression.cols(); } - inline int stride() const { return m_expression.stride(); } + inline int outerStride() const { return m_expression.outerStride(); } + inline int innerStride() const { return m_expression.innerStride(); } inline const CoeffReturnType coeff(int row, int col) const { diff --git a/Eigen/src/Array/Reverse.h b/Eigen/src/Array/Reverse.h index a405fbb4b..07b9f77b7 100644 --- a/Eigen/src/Array/Reverse.h +++ b/Eigen/src/Array/Reverse.h @@ -81,11 +81,11 @@ template class Reverse typedef typename MatrixType::template MakeBase< Reverse >::Type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + using Base::IsRowMajor; protected: enum { PacketSize = ei_packet_traits::size, - IsRowMajor = Flags & RowMajorBit, IsColMajor = !IsRowMajor, ReverseRow = (Direction == Vertical) || (Direction == BothDirections), ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index 4d3149d42..8699fe7e0 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -62,14 +62,21 @@ template class LDLT typedef Matrix IntColVectorType; typedef Matrix IntRowVectorType; - /** - * \brief Default Constructor. - * - * The default constructor is useful in cases in which the user intends to - * perform decompositions via LDLT::compute(const MatrixType&). - */ + /** \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LDLT::compute(const MatrixType&). + */ LDLT() : m_matrix(), m_p(), m_transpositions(), m_isInitialized(false) {} + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LDLT() + */ + LDLT(int size) : m_matrix(size,size), m_p(size), m_transpositions(size), m_isInitialized(false) {} + LDLT(const MatrixType& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_p(matrix.rows()), @@ -148,6 +155,8 @@ template class LDLT return m_matrix; } + MatrixType reconstructedMatrix() const; + inline int rows() const { return m_matrix.rows(); } inline int cols() const { return m_matrix.cols(); } @@ -175,6 +184,10 @@ LDLT& LDLT::compute(const MatrixType& a) m_matrix = a; + m_p.resize(size); + m_transpositions.resize(size); + m_isInitialized = false; + if (size <= 1) { m_p.setZero(); m_transpositions.setZero(); @@ -202,11 +215,8 @@ LDLT& LDLT::compute(const MatrixType& a) { // The biggest overall is the point of reference to which further diagonals // are compared; if any diagonal is negligible compared - // to the largest overall, the algorithm bails. This cutoff is suggested - // in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by - // Nicholas J. Higham. Also see "Accuracy and Stability of Numerical - // Algorithms" page 217, also by Higham. - cutoff = ei_abs(NumTraits::epsilon() * RealScalar(size) * biggest_in_corner); + // to the largest overall, the algorithm bails. + cutoff = ei_abs(NumTraits::epsilon() * biggest_in_corner); m_sign = ei_real(m_matrix.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1; } @@ -231,26 +241,21 @@ LDLT& LDLT::compute(const MatrixType& a) continue; } - RealScalar Djj = ei_real(m_matrix.coeff(j,j) - m_matrix.row(j).head(j) - .dot(m_matrix.col(j).head(j))); + RealScalar Djj = ei_real(m_matrix.coeff(j,j) - m_matrix.row(j).head(j).dot(m_matrix.col(j).head(j))); m_matrix.coeffRef(j,j) = Djj; - // Finish early if the matrix is not full rank. - if(ei_abs(Djj) < cutoff) - { - for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i; - break; - } - int endSize = size - j - 1; if (endSize > 0) { _temporary.tail(endSize).noalias() = m_matrix.block(j+1,0, endSize, j) * m_matrix.col(j).head(j).conjugate(); m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate() - - _temporary.tail(endSize).transpose(); + - _temporary.tail(endSize).transpose(); - m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj; + if(ei_abs(Djj) > cutoff) + { + m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj; + } } } @@ -315,6 +320,31 @@ bool LDLT::solveInPlace(MatrixBase &bAndX) const return true; } +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^T L D L^* P. + * This function is provided for debug purpose. */ +template +MatrixType LDLT::reconstructedMatrix() const +{ + ei_assert(m_isInitialized && "LDLT is not initialized."); + const int size = m_matrix.rows(); + MatrixType res(size,size); + res.setIdentity(); + + // PI + for(int i = 0; i < size; ++i) res.row(m_transpositions.coeff(i)).swap(res.row(i)); + // L^* P + res = matrixL().adjoint() * res; + // D(L^*P) + res = vectorD().asDiagonal() * res; + // L(DL^*P) + res = matrixL() * res; + // P^T (LDL^*P) + for (int i = size-1; i >= 0; --i) res.row(m_transpositions.coeff(i)).swap(res.row(i)); + + return res; +} + /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this */ diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 474b82406..2e8df7661 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -117,7 +117,7 @@ template class LLT && "LLT::solve(): invalid number of rows of the right hand side matrix b"); return ei_solve_retval(*this, b.derived()); } - + template bool solveInPlace(MatrixBase &bAndX) const; @@ -133,6 +133,8 @@ template class LLT return m_matrix; } + MatrixType reconstructedMatrix() const; + inline int rows() const { return m_matrix.rows(); } inline int cols() const { return m_matrix.cols(); } @@ -295,6 +297,16 @@ bool LLT::solveInPlace(MatrixBase &bAndX) const return true; } +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: L L^*. + * This function is provided for debug purpose. */ +template +MatrixType LLT::reconstructedMatrix() const +{ + ei_assert(m_isInitialized && "LLT is not initialized."); + return matrixL() * matrixL().adjoint().toDenseMatrix(); +} + /** \cholesky_module * \returns the LLT decomposition of \c *this */ diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 9440cebf1..eb7bca1da 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2007 Michael Olbrich -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2006-2010 Benoit Jacob // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or @@ -37,34 +37,35 @@ struct ei_assign_traits public: enum { DstIsAligned = Derived::Flags & AlignedBit, + DstHasDirectAccess = Derived::Flags & DirectAccessBit, SrcIsAligned = OtherDerived::Flags & AlignedBit, - SrcAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned + JointAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned }; private: enum { - InnerSize = int(Derived::Flags)&RowMajorBit - ? Derived::ColsAtCompileTime - : Derived::RowsAtCompileTime, - InnerMaxSize = int(Derived::Flags)&RowMajorBit - ? Derived::MaxColsAtCompileTime - : Derived::MaxRowsAtCompileTime, - MaxSizeAtCompileTime = ei_size_at_compile_time::ret, + InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) + : int(Derived::RowsAtCompileTime), + InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) + : int(Derived::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Derived::SizeAtCompileTime, PacketSize = ei_packet_traits::size }; enum { - StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit), + StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), MightVectorize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 && int(DstIsAligned) && int(SrcIsAligned), MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), - MayLinearVectorize = MightVectorize && MayLinearize - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), + MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess + && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ - MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize + MaySliceVectorize = MightVectorize && DstHasDirectAccess && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block in a fixed-size matrix */ @@ -104,16 +105,18 @@ public: : int(NoUnrolling) }; +#ifdef EIGEN_DEBUG_ASSIGN static void debug() { EIGEN_DEBUG_VAR(DstIsAligned) EIGEN_DEBUG_VAR(SrcIsAligned) - EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(JointAlignment) EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(PacketSize) EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MaySliceVectorize) @@ -123,6 +126,7 @@ public: EIGEN_DEBUG_VAR(MayUnrollInner) EIGEN_DEBUG_VAR(Unrolling) } +#endif }; /*************************************************************************** @@ -137,17 +141,13 @@ template struct ei_assign_DefaultTraversal_CompleteUnrolling { enum { - row = int(Derived1::Flags)&RowMajorBit - ? Index / int(Derived1::ColsAtCompileTime) - : Index % Derived1::RowsAtCompileTime, - col = int(Derived1::Flags)&RowMajorBit - ? Index % int(Derived1::ColsAtCompileTime) - : Index / Derived1::RowsAtCompileTime + outer = Index / Derived1::InnerSizeAtCompileTime, + inner = Index % Derived1::InnerSizeAtCompileTime }; EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { - dst.copyCoeff(row, col, src); + dst.copyCoeffByOuterInner(outer, inner, src); ei_assign_DefaultTraversal_CompleteUnrolling::run(dst, src); } }; @@ -161,13 +161,10 @@ struct ei_assign_DefaultTraversal_CompleteUnrolling struct ei_assign_DefaultTraversal_InnerUnrolling { - EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col) + EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer) { - const bool rowMajor = int(Derived1::Flags)&RowMajorBit; - const int row = rowMajor ? row_or_col : Index; - const int col = rowMajor ? Index : row_or_col; - dst.copyCoeff(row, col, src); - ei_assign_DefaultTraversal_InnerUnrolling::run(dst, src, row_or_col); + dst.copyCoeffByOuterInner(outer, Index, src); + ei_assign_DefaultTraversal_InnerUnrolling::run(dst, src, outer); } }; @@ -205,18 +202,14 @@ template struct ei_assign_innervec_CompleteUnrolling { enum { - row = int(Derived1::Flags)&RowMajorBit - ? Index / int(Derived1::ColsAtCompileTime) - : Index % Derived1::RowsAtCompileTime, - col = int(Derived1::Flags)&RowMajorBit - ? Index % int(Derived1::ColsAtCompileTime) - : Index / Derived1::RowsAtCompileTime, - SrcAlignment = ei_assign_traits::SrcAlignment + outer = Index / Derived1::InnerSizeAtCompileTime, + inner = Index % Derived1::InnerSizeAtCompileTime, + JointAlignment = ei_assign_traits::JointAlignment }; EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { - dst.template copyPacket(row, col, src); + dst.template copyPacketByOuterInner(outer, inner, src); ei_assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); } @@ -231,13 +224,11 @@ struct ei_assign_innervec_CompleteUnrolling template struct ei_assign_innervec_InnerUnrolling { - EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col) + EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int outer) { - const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index; - const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col; - dst.template copyPacket(row, col, src); + dst.template copyPacketByOuterInner(outer, Index, src); ei_assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, row_or_col); + Index+ei_packet_traits::size, Stop>::run(dst, src, outer); } }; @@ -267,14 +258,9 @@ struct ei_assign_impl { const int innerSize = dst.innerSize(); const int outerSize = dst.outerSize(); - for(int j = 0; j < outerSize; ++j) - for(int i = 0; i < innerSize; ++i) - { - if(int(Derived1::Flags)&RowMajorBit) - dst.copyCoeff(j, i, src); - else - dst.copyCoeff(i, j, src); - } + for(int outer = 0; outer < outerSize; ++outer) + for(int inner = 0; inner < innerSize; ++inner) + dst.copyCoeffByOuterInner(outer, inner, src); } }; @@ -293,12 +279,10 @@ struct ei_assign_impl { EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { - const bool rowMajor = int(Derived1::Flags)&RowMajorBit; - const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime; const int outerSize = dst.outerSize(); - for(int j = 0; j < outerSize; ++j) - ei_assign_DefaultTraversal_InnerUnrolling - ::run(dst, src, j); + for(int outer = 0; outer < outerSize; ++outer) + ei_assign_DefaultTraversal_InnerUnrolling + ::run(dst, src, outer); } }; @@ -339,14 +323,9 @@ struct ei_assign_impl const int innerSize = dst.innerSize(); const int outerSize = dst.outerSize(); const int packetSize = ei_packet_traits::size; - for(int j = 0; j < outerSize; ++j) - for(int i = 0; i < innerSize; i+=packetSize) - { - if(int(Derived1::Flags)&RowMajorBit) - dst.template copyPacket(j, i, src); - else - dst.template copyPacket(i, j, src); - } + for(int outer = 0; outer < outerSize; ++outer) + for(int inner = 0; inner < innerSize; inner+=packetSize) + dst.template copyPacketByOuterInner(outer, inner, src); } }; @@ -365,12 +344,10 @@ struct ei_assign_impl - ::run(dst, src, j); + for(int outer = 0; outer < outerSize; ++outer) + ei_assign_innervec_InnerUnrolling + ::run(dst, src, outer); } }; @@ -406,7 +383,7 @@ struct ei_unaligned_assign_impl template struct ei_assign_impl { - inline static void run(Derived1 &dst, const Derived2 &src) + EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src) { const int size = dst.size(); const int packetSize = ei_packet_traits::size; @@ -418,7 +395,7 @@ struct ei_assign_impl::SrcAlignment>(index, src); + dst.template copyPacket::JointAlignment>(index, src); } ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size); @@ -452,40 +429,24 @@ struct ei_assign_impl const int packetAlignedMask = packetSize - 1; const int innerSize = dst.innerSize(); const int outerSize = dst.outerSize(); - const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask; + const int alignedStep = (packetSize - dst.outerStride() % packetSize) & packetAlignedMask; int alignedStart = ei_assign_traits::DstIsAligned ? 0 : ei_first_aligned(&dst.coeffRef(0,0), innerSize); - for(int i = 0; i < outerSize; ++i) + for(int outer = 0; outer < outerSize; ++outer) { const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for (int index = 0; index(i, index, src); - else - dst.template copyPacket(index, i, src); - } + for(int inner = alignedStart; inner(outer, inner, src); // do the non-vectorizable part of the assignment - for (int index = alignedEnd; index((alignedStart+alignedStep)%packetSize, innerSize); } diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 3b4234c22..e6cfb0859 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2006-2010 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -100,8 +100,8 @@ template=0) && ( ((BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) && i= 0 && BlockRows >= 1 && startRow + BlockRows <= matrix.rows() @@ -196,8 +196,8 @@ template && startCol >= 0 && blockCols >= 0 && startCol + blockCols <= matrix.cols()); } - /** \sa MapBase::stride() */ - inline int stride() const + /** \sa MapBase::innerStride() */ + inline int innerStride() const { - return ((!Base::IsVectorAtCompileTime) - || (BlockRows==1 && ((Flags&RowMajorBit)==0)) - || (BlockCols==1 && ((Flags&RowMajorBit)==RowMajorBit))) - ? m_matrix.stride() : 1; + return RowsAtCompileTime==1 ? m_matrix.colStride() + : ColsAtCompileTime==1 ? m_matrix.rowStride() + : m_matrix.innerStride(); + } + + /** \sa MapBase::outerStride() */ + inline int outerStride() const + { + return IsVectorAtCompileTime ? this->size() : m_matrix.outerStride(); } #ifndef __SUNPRO_CC // FIXME sunstudio is not friendly with the above friend... + // META-FIXME there is no 'friend' keyword around here. Is this obsolete? protected: #endif diff --git a/Eigen/src/Core/Coeffs.h b/Eigen/src/Core/Coeffs.h index ebfd0c80e..da7b9153f 100644 --- a/Eigen/src/Core/Coeffs.h +++ b/Eigen/src/Core/Coeffs.h @@ -25,6 +25,24 @@ #ifndef EIGEN_COEFFS_H #define EIGEN_COEFFS_H +template +EIGEN_STRONG_INLINE int DenseBase::rowIndexByOuterInner(int outer, int inner) +{ + return int(Derived::RowsAtCompileTime) == 1 ? 0 + : int(Derived::ColsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? outer + : inner; +} + +template +EIGEN_STRONG_INLINE int DenseBase::colIndexByOuterInner(int outer, int inner) +{ + return int(Derived::ColsAtCompileTime) == 1 ? 0 + : int(Derived::RowsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? inner + : outer; +} + /** Short version: don't use this function, use * \link operator()(int,int) const \endlink instead. * @@ -48,6 +66,14 @@ EIGEN_STRONG_INLINE const typename DenseBase::CoeffReturnType DenseBase return derived().coeff(row, col); } +template +EIGEN_STRONG_INLINE const typename DenseBase::CoeffReturnType DenseBase + ::coeffByOuterInner(int outer, int inner) const +{ + return coeff(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); +} + /** \returns the coefficient at given the given row and column. * * \sa operator()(int,int), operator[](int) const @@ -84,6 +110,14 @@ EIGEN_STRONG_INLINE typename ei_traits::Scalar& DenseBase return derived().coeffRef(row, col); } +template +EIGEN_STRONG_INLINE typename ei_traits::Scalar& DenseBase + ::coeffRefByOuterInner(int outer, int inner) +{ + return coeffRef(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); +} + /** \returns a reference to the coefficient at given the given row and column. * * \sa operator()(int,int) const, operator[](int) @@ -261,6 +295,15 @@ DenseBase::packet(int row, int col) const return derived().template packet(row,col); } +template +template +EIGEN_STRONG_INLINE typename ei_packet_traits::Scalar>::type +DenseBase::packetByOuterInner(int outer, int inner) const +{ + return packet(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); +} + /** Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility * to ensure that a packet really starts there. This method is only available on expressions having the * PacketAccessBit. @@ -279,6 +322,16 @@ EIGEN_STRONG_INLINE void DenseBase::writePacket derived().template writePacket(row,col,x); } +template +template +EIGEN_STRONG_INLINE void DenseBase::writePacketByOuterInner +(int outer, int inner, const typename ei_packet_traits::Scalar>::type& x) +{ + writePacket(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner), + x); +} + /** \returns the packet of coefficients starting at the given index. It is your responsibility * to ensure that a packet really starts there. This method is only available on expressions having the * PacketAccessBit and the LinearAccessBit. @@ -346,6 +399,16 @@ EIGEN_STRONG_INLINE void DenseBase::copyCoeff(int index, const DenseBas derived().coeffRef(index) = other.derived().coeff(index); } +template +template +EIGEN_STRONG_INLINE void DenseBase::copyCoeffByOuterInner(int outer, int inner, const DenseBase& other) +{ + const int row = Derived::rowIndexByOuterInner(outer,inner); + const int col = Derived::colIndexByOuterInner(outer,inner); + // derived() is important here: copyCoeff() may be reimplemented in Derived! + derived().copyCoeff(row, col, other); +} + /** \internal Copies the packet at position (row,col) of other into *this. * * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code @@ -379,6 +442,16 @@ EIGEN_STRONG_INLINE void DenseBase::copyPacket(int index, const DenseBa other.derived().template packet(index)); } +template +template +EIGEN_STRONG_INLINE void DenseBase::copyPacketByOuterInner(int outer, int inner, const DenseBase& other) +{ + const int row = Derived::rowIndexByOuterInner(outer,inner); + const int col = Derived::colIndexByOuterInner(outer,inner); + // derived() is important here: copyCoeff() may be reimplemented in Derived! + derived().copyPacket(row, col, other); +} + template struct ei_first_aligned_impl { diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index eb018738f..67540bd8c 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -124,7 +124,14 @@ template class DenseBase * constructed from this one. See the \ref flags "list of flags". */ - IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression is row major. */ + IsRowMajor = RowsAtCompileTime==1 ? 1 + : ColsAtCompileTime==1 ? 0 + : int(Flags) & RowMajorBit, /**< True if this expression has row-major effective addressing. + For non-vectors, it is like reading the RowMajorBit on the Flags. For vectors, this is + overriden by the convention that row-vectors are row-major and column-vectors are column-major. */ + + InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? SizeAtCompileTime + : int(Flags)&RowMajorBit ? ColsAtCompileTime : RowsAtCompileTime, CoeffReadCost = ei_traits::CoeffReadCost, /**< This is a rough measure of how expensive it is to read one coefficient from @@ -160,31 +167,98 @@ template class DenseBase * In other words, this function returns * \code rows()==1 || cols()==1 \endcode * \sa rows(), cols(), IsVectorAtCompileTime. */ - inline bool isVector() const { return rows()==1 || cols()==1; } - /** \returns the size of the storage major dimension, - * i.e., the number of columns for a columns major matrix, and the number of rows otherwise */ - int outerSize() const { return (int(Flags)&RowMajorBit) ? this->rows() : this->cols(); } - /** \returns the size of the inner dimension according to the storage order, - * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */ - int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); } - /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is - * Matrix::resize(). The present method only asserts that the new size equals the old size, and does + /** \returns the outer size. + * + * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension + * with respect to the storage order, i.e., the number of columns for a column-major matrix, + * and the number of rows for a row-major matrix. */ + int outerSize() const + { + return IsVectorAtCompileTime ? 1 + : (int(Flags)&RowMajorBit) ? this->rows() : this->cols(); + } + + /** \returns the inner size. + * + * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension + * with respect to the storage order, i.e., the number of rows for a column-major matrix, + * and the number of columns for a row-major matrix. */ + int innerSize() const + { + return IsVectorAtCompileTime ? this->size() + : (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); + } + + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ void resize(int size) { ei_assert(size == this->size() - && "MatrixBase::resize() does not actually allow to resize."); + && "DenseBase::resize() does not actually allow to resize."); } - /** Only plain matrices, not expressions may be resized; therefore the only useful resize method is - * Matrix::resize(). The present method only asserts that the new size equals the old size, and does + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ void resize(int rows, int cols) { ei_assert(rows == this->rows() && cols == this->cols() - && "MatrixBase::resize() does not actually allow to resize."); + && "DenseBase::resize() does not actually allow to resize."); + } + + /** \returns the pointer increment between two consecutive elements. + * + * \note For vectors, the storage order is ignored. For matrices (non-vectors), we're looking + * at the increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + inline int innerStride() const + { + EIGEN_STATIC_ASSERT(int(Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + return derived().innerStride(); + } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \note For vectors, the storage order is ignored, there is only one inner slice, and so this method returns 1. + * For matrices (non-vectors), the notion of inner slice depends on the storage order. + * + * \sa innerStride(), rowStride(), colStride() + */ + inline int outerStride() const + { + EIGEN_STATIC_ASSERT(int(Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + return derived().outerStride(); + } + + inline int stride() const + { + return IsVectorAtCompileTime ? innerStride() : outerStride(); + } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + inline int rowStride() const + { + return IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + inline int colStride() const + { + return IsRowMajor ? innerStride() : outerStride(); } #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -242,9 +316,11 @@ template class DenseBase CommaInitializer operator<< (const DenseBase& other); const CoeffReturnType coeff(int row, int col) const; + const CoeffReturnType coeffByOuterInner(int outer, int inner) const; const CoeffReturnType operator()(int row, int col) const; Scalar& coeffRef(int row, int col); + Scalar& coeffRefByOuterInner(int outer, int inner); Scalar& operator()(int row, int col); const CoeffReturnType coeff(int index) const; @@ -259,17 +335,30 @@ template class DenseBase template void copyCoeff(int row, int col, const DenseBase& other); template + void copyCoeffByOuterInner(int outer, int inner, const DenseBase& other); + template void copyCoeff(int index, const DenseBase& other); template void copyPacket(int row, int col, const DenseBase& other); template + void copyPacketByOuterInner(int outer, int inner, const DenseBase& other); + template void copyPacket(int index, const DenseBase& other); + + private: + static int rowIndexByOuterInner(int outer, int inner); + static int colIndexByOuterInner(int outer, int inner); + public: #endif // not EIGEN_PARSED_BY_DOXYGEN template PacketScalar packet(int row, int col) const; + template + PacketScalar packetByOuterInner(int outer, int inner) const; template void writePacket(int row, int col, const PacketScalar& x); + template + void writePacketByOuterInner(int outer, int inner, const PacketScalar& x); template PacketScalar packet(int index) const; @@ -409,13 +498,6 @@ template class DenseBase template void swap(DenseBase EIGEN_REF_TO_TEMPORARY other); - /** \returns number of elements to skip to pass from one row (resp. column) to another - * for a row-major (resp. column-major) matrix. - * Combined with coeffRef() and the \ref flags flags, it allows a direct access to the data - * of the underlying matrix. - */ - inline int stride() const { return derived().stride(); } - inline const NestByValue nestByValue() const; inline const ForceAlignedAccess forceAlignedAccess() const; inline ForceAlignedAccess forceAlignedAccess(); diff --git a/Eigen/src/Core/DenseStorageBase.h b/Eigen/src/Core/DenseStorageBase.h index 6245b8007..dac2142a4 100644 --- a/Eigen/src/Core/DenseStorageBase.h +++ b/Eigen/src/Core/DenseStorageBase.h @@ -75,23 +75,6 @@ class DenseStorageBase : public _Base EIGEN_STRONG_INLINE int rows() const { return m_storage.rows(); } EIGEN_STRONG_INLINE int cols() const { return m_storage.cols(); } - /** Returns the leading dimension (for matrices) or the increment (for vectors) to be used with data(). - * - * More precisely: - * - for a column major matrix it returns the number of elements between two successive columns - * - for a row major matrix it returns the number of elements between two successive rows - * - for a vector it returns the number of elements between two successive coefficients - * This function has to be used together with the MapBase::data() function. - * - * \sa data() */ - EIGEN_STRONG_INLINE int stride() const - { - if(IsVectorAtCompileTime) - return 1; - else - return (Flags & RowMajorBit) ? m_storage.cols() : m_storage.rows(); - } - EIGEN_STRONG_INLINE const Scalar& coeff(int row, int col) const { if(Flags & RowMajorBit) @@ -253,12 +236,12 @@ class DenseStorageBase : public _Base { if(RowsAtCompileTime == 1) { - ei_assert(other.isVector()); + ei_assert(other.rows() == 1 || other.cols() == 1); resize(1, other.size()); } else if(ColsAtCompileTime == 1) { - ei_assert(other.isVector()); + ei_assert(other.rows() == 1 || other.cols() == 1); resize(other.size(), 1); } else resize(other.rows(), other.cols()); @@ -379,6 +362,9 @@ class DenseStorageBase : public _Base * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned * \a data pointers. * + * These methods do not allow to specify strides. If you need to specify strides, you have to + * use the Map class directly. + * * \see class Map */ //@{ @@ -544,11 +530,21 @@ struct ei_conservative_resize_like_impl { if (_this.rows() == rows && _this.cols() == cols) return; EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) - typename Derived::PlainObject tmp(rows,cols); - const int common_rows = std::min(rows, _this.rows()); - const int common_cols = std::min(cols, _this.cols()); - tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); - _this.derived().swap(tmp); + + if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns + { + _this.derived().m_storage.conservativeResize(rows*cols,rows,cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(rows,cols); + const int common_rows = std::min(rows, _this.rows()); + const int common_cols = std::min(cols, _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } } static void run(DenseBase& _this, const DenseBase& other) @@ -563,11 +559,26 @@ struct ei_conservative_resize_like_impl EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived) - typename Derived::PlainObject tmp(other); - const int common_rows = std::min(tmp.rows(), _this.rows()); - const int common_cols = std::min(tmp.cols(), _this.cols()); - tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); - _this.derived().swap(tmp); + if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == other.rows()) ) // column-major and we change only the number of columns + { + const int new_rows = other.rows() - _this.rows(); + const int new_cols = other.cols() - _this.cols(); + _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols()); + if (new_rows>0) + _this.corner(BottomRight, new_rows, other.cols()) = other.corner(BottomRight, new_rows, other.cols()); + else if (new_cols>0) + _this.corner(BottomRight, other.rows(), new_cols) = other.corner(BottomRight, other.rows(), new_cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(other); + const int common_rows = std::min(tmp.rows(), _this.rows()); + const int common_cols = std::min(tmp.cols(), _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } } }; @@ -576,22 +587,23 @@ struct ei_conservative_resize_like_impl { static void run(DenseBase& _this, int size) { - if (_this.size() == size) return; - typename Derived::PlainObject tmp(size); - const int common_size = std::min(_this.size(),size); - tmp.segment(0,common_size) = _this.segment(0,common_size); - _this.derived().swap(tmp); + const int new_rows = Derived::RowsAtCompileTime==1 ? 1 : size; + const int new_cols = Derived::RowsAtCompileTime==1 ? size : 1; + _this.derived().m_storage.conservativeResize(size,new_rows,new_cols); } static void run(DenseBase& _this, const DenseBase& other) { if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; - // segment(...) will check whether Derived/OtherDerived are vectors! - typename Derived::PlainObject tmp(other); - const int common_size = std::min(_this.size(),tmp.size()); - tmp.segment(0,common_size) = _this.segment(0,common_size); - _this.derived().swap(tmp); + const int num_new_elements = other.size() - _this.size(); + + const int new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows(); + const int new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1; + _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols); + + if (num_new_elements > 0) + _this.tail(num_new_elements) = other.tail(num_new_elements); } }; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 201bd23ca..fbdc67bd3 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2006-2008, 2010 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,224 +25,35 @@ #ifndef EIGEN_DOT_H #define EIGEN_DOT_H -/*************************************************************************** -* Part 1 : the logic deciding a strategy for vectorization and unrolling -***************************************************************************/ - -template -struct ei_dot_traits -{ -public: - enum { - Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit) - && (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit) - ? LinearVectorizedTraversal - : DefaultTraversal - }; - -private: - typedef typename Derived1::Scalar Scalar; - enum { - PacketSize = ei_packet_traits::size, - Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits::MulCost) - + (Derived1::SizeAtCompileTime-1) * NumTraits::AddCost, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) - }; - -public: - enum { - Unrolling = Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling - }; -}; - -/*************************************************************************** -* Part 2 : unrollers -***************************************************************************/ - -/*** no vectorization ***/ - -template -struct ei_dot_novec_unroller -{ - enum { - HalfLength = Length/2 - }; - - typedef typename Derived1::Scalar Scalar; - - inline static Scalar run(const Derived1& v1, const Derived2& v2) - { - return ei_dot_novec_unroller::run(v1, v2) - + ei_dot_novec_unroller::run(v1, v2); - } -}; - -template -struct ei_dot_novec_unroller -{ - typedef typename Derived1::Scalar Scalar; - - inline static Scalar run(const Derived1& v1, const Derived2& v2) - { - return ei_conj(v1.coeff(Start)) * v2.coeff(Start); - } -}; - -/*** vectorization ***/ - -template::size)> -struct ei_dot_vec_unroller -{ - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - enum { - row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index, - col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0, - row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index, - col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0 - }; - - inline static PacketScalar run(const Derived1& v1, const Derived2& v2) - { - return ei_pmadd( - v1.template packet(row1, col1), - v2.template packet(row2, col2), - ei_dot_vec_unroller::size, Stop>::run(v1, v2) - ); - } -}; - -template -struct ei_dot_vec_unroller -{ - enum { - row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index, - col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0, - row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index, - col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0, - alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned, - alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned - }; - - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - inline static PacketScalar run(const Derived1& v1, const Derived2& v2) - { - return ei_pmul(v1.template packet(row1, col1), v2.template packet(row2, col2)); - } -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Traversal, - int Unrolling = ei_dot_traits::Unrolling +// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot +// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE +// looking at the static assertions. Thus this is a trick to get better compile errors. +template -struct ei_dot_impl; - -template -struct ei_dot_impl +struct ei_dot_nocheck { - typedef typename Derived1::Scalar Scalar; - static Scalar run(const Derived1& v1, const Derived2& v2) + static inline typename ei_traits::Scalar run(const MatrixBase& a, const MatrixBase& b) { - ei_assert(v1.size()>0 && "you are using a non initialized vector"); - Scalar res; - res = ei_conj(v1.coeff(0)) * v2.coeff(0); - for(int i = 1; i < v1.size(); ++i) - res += ei_conj(v1.coeff(i)) * v2.coeff(i); - return res; + return a.conjugate().cwiseProduct(b).sum(); } }; -template -struct ei_dot_impl - : public ei_dot_novec_unroller -{}; - -template -struct ei_dot_impl +template +struct ei_dot_nocheck { - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - static Scalar run(const Derived1& v1, const Derived2& v2) + static inline typename ei_traits::Scalar run(const MatrixBase& a, const MatrixBase& b) { - const int size = v1.size(); - const int packetSize = ei_packet_traits::size; - const int alignedSize = (size/packetSize)*packetSize; - enum { - alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned, - alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned - }; - Scalar res; - - // do the vectorizable part of the sum - if(size >= packetSize) - { - PacketScalar packet_res = ei_pmul( - v1.template packet(0), - v2.template packet(0) - ); - for(int index = packetSize; index(index), - v2.template packet(index), - packet_res - ); - } - res = ei_predux(packet_res); - - // now we must do the rest without vectorization. - if(alignedSize == size) return res; - } - else // too small to vectorize anything. - // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. - { - res = Scalar(0); - } - - // do the remainder of the vector - for(int index = alignedSize; index < size; ++index) - { - res += v1.coeff(index) * v2.coeff(index); - } - - return res; + return a.adjoint().cwiseProduct(b).sum(); } }; -template -struct ei_dot_impl -{ - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - enum { - PacketSize = ei_packet_traits::size, - Size = Derived1::SizeAtCompileTime, - VectorizedSize = (Size / PacketSize) * PacketSize - }; - static Scalar run(const Derived1& v1, const Derived2& v2) - { - Scalar res = ei_predux(ei_dot_vec_unroller::run(v1, v2)); - if (VectorizedSize != Size) - res += ei_dot_novec_unroller::run(v1, v2); - return res; - } -}; - -/*************************************************************************** -* Part 4 : implementation of MatrixBase methods -***************************************************************************/ - /** \returns the dot product of *this with other. * * \only_for_vectors @@ -266,10 +77,7 @@ MatrixBase::dot(const MatrixBase& other) const ei_assert(size() == other.size()); - // dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) ) - typedef typename ei_cleantype::type ThisNested; - typedef typename ei_cleantype::type OtherNested; - return ei_dot_impl::run(derived(), other.derived()); + return ei_dot_nocheck::run(*this, other); } /** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself. diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 0044fe7cb..9413b74fa 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -60,7 +60,8 @@ template clas inline int rows() const { return m_matrix.rows(); } inline int cols() const { return m_matrix.cols(); } - inline int stride() const { return m_matrix.stride(); } + inline int outerStride() const { return m_matrix.outerStride(); } + inline int innerStride() const { return m_matrix.innerStride(); } inline const Scalar coeff(int row, int col) const { diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h index 927f43413..300b22329 100644 --- a/Eigen/src/Core/ForceAlignedAccess.h +++ b/Eigen/src/Core/ForceAlignedAccess.h @@ -52,7 +52,8 @@ template class ForceAlignedAccess inline int rows() const { return m_expression.rows(); } inline int cols() const { return m_expression.cols(); } - inline int stride() const { return m_expression.stride(); } + inline int outerStride() const { return m_expression.outerStride(); } + inline int innerStride() const { return m_expression.innerStride(); } inline const CoeffReturnType coeff(int row, int col) const { diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index 31d0cff70..c2b317cc0 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -179,7 +179,7 @@ struct ei_functor_traits > { enum { Cost = 2 * NumTraits::MulCost, PacketAccess = ei_packet_traits::size>1 - #if (defined EIGEN_VECTORIZE_SSE) + #if (defined EIGEN_VECTORIZE) && NumTraits::HasFloatingPoint #endif }; diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 83688dbca..6e9a5439e 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2007-2010 Benoit Jacob // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or @@ -33,10 +33,35 @@ * \param MatrixType the equivalent matrix type of the mapped data * \param Options specifies whether the pointer is \c Aligned, or \c Unaligned. * The default is \c Unaligned. + * \param StrideType optionnally specifies strides. By default, Map assumes the memory layout + * of an ordinary, contiguous array. This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. * * This class represents a matrix or vector expression mapping an existing array of data. * It can be used to let Eigen interface without any overhead with non-Eigen data structures, - * such as plain C arrays or structures from other libraries. + * such as plain C arrays or structures from other libraries. By default, it assumes that the + * data is laid out contiguously in memory. You can however override this by explicitly specifying + * inner and outer strides. + * + * Here's an example of simply mapping a contiguous array as a column-major matrix: + * \include Map_simple.cpp + * Output: \verbinclude Map_simple.out + * + * If you need to map non-contiguous arrays, you can do so by specifying strides: + * + * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer + * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time + * fixed value. + * \include Map_inner_stride.cpp + * Output: \verbinclude Map_inner_stride.out + * + * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping + * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns. + * Here, we're specifying the outer stride as a runtime parameter. + * \include Map_outer_stride.cpp + * Output: \verbinclude Map_outer_stride.out + * + * For more details and for an example of specifying both an inner and an outer stride, see class Stride. * * \b Tip: to change the array of data mapped by a Map object, you can use the C++ * placement new syntax: @@ -48,48 +73,86 @@ * * \sa Matrix::Map() */ -template -struct ei_traits > : public ei_traits +template +struct ei_traits > + : public ei_traits { + typedef typename MatrixType::Scalar Scalar; enum { - Flags = (Options&Aligned)==Aligned ? ei_traits::Flags | AlignedBit - : ei_traits::Flags & ~AlignedBit + InnerStride = StrideType::InnerStrideAtCompileTime, + OuterStride = StrideType::OuterStrideAtCompileTime, + HasNoInnerStride = InnerStride <= 1, + HasNoOuterStride = OuterStride == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsAligned = int(int(Options)&Aligned)==Aligned, + IsDynamicSize = MatrixType::SizeAtCompileTime==Dynamic, + KeepsPacketAccess = bool(HasNoInnerStride) + && ( bool(IsDynamicSize) + || HasNoOuterStride + || ( OuterStride!=Dynamic && ((int(OuterStride)*sizeof(Scalar))%16)==0 ) ), + Flags0 = ei_traits::Flags, + Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit, + Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) }; }; -template class Map - : public MapBase, - typename MatrixType::template MakeBase< Map >::Type> +template class Map + : public MapBase, + typename MatrixType::template MakeBase< + Map + >::Type> { public: typedef MapBase::Type> Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Map) - inline int stride() const { return this->innerSize(); } - - inline Map(const Scalar* data) : Base(data) {} - - inline Map(const Scalar* data, int size) : Base(data, size) {} - - inline Map(const Scalar* data, int rows, int cols) : Base(data, rows, cols) {} - - inline void resize(int rows, int cols) + inline int innerStride() const { - EIGEN_ONLY_USED_FOR_DEBUG(rows); - EIGEN_ONLY_USED_FOR_DEBUG(cols); - ei_assert(rows == this->rows()); - ei_assert(cols == this->cols()); + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - inline void resize(int size) + inline int outerStride() const { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(MatrixType) - EIGEN_ONLY_USED_FOR_DEBUG(size); - ei_assert(size == this->size()); + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags)&RowMajorBit ? this->cols() + : this->rows(); } + /** Constructor in the fixed-size case. + * + * \param data pointer to the array to map + * \param stride optional Stride object, passing the strides. + */ + inline Map(const Scalar* data, const StrideType& stride = StrideType()) + : Base(data), m_stride(stride) {} + + /** Constructor in the dynamic-size vector case. + * + * \param data pointer to the array to map + * \param size the size of the vector expression + * \param stride optional Stride object, passing the strides. + */ + inline Map(const Scalar* data, int size, const StrideType& stride = StrideType()) + : Base(data, size), m_stride(stride) {} + + /** Constructor in the dynamic-size matrix case. + * + * \param data pointer to the array to map + * \param rows the number of rows of the matrix expression + * \param cols the number of columns of the matrix expression + * \param stride optional Stride object, passing the strides. + */ + inline Map(const Scalar* data, int rows, int cols, const StrideType& stride = StrideType()) + : Base(data, rows, cols), m_stride(stride) {} + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) + + protected: + StrideType m_stride; }; template diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index a922d8bb0..d735cfc47 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2007-2010 Benoit Jacob // Copyright (C) 2008 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or @@ -37,9 +37,7 @@ template class MapBase { public: -// typedef MatrixBase Base; enum { - IsRowMajor = (int(ei_traits::Flags) & RowMajorBit) ? 1 : 0, RowsAtCompileTime = ei_traits::RowsAtCompileTime, ColsAtCompileTime = ei_traits::ColsAtCompileTime, SizeAtCompileTime = Base::SizeAtCompileTime @@ -48,94 +46,75 @@ template class MapBase typedef typename ei_traits::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; using Base::derived; + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; inline int rows() const { return m_rows.value(); } inline int cols() const { return m_cols.value(); } - /** Returns the leading dimension (for matrices) or the increment (for vectors) to be used with data(). - * - * More precisely: - * - for a column major matrix it returns the number of elements between two successive columns - * - for a row major matrix it returns the number of elements between two successive rows - * - for a vector it returns the number of elements between two successive coefficients - * This function has to be used together with the MapBase::data() function. - * - * \sa MapBase::data() */ - inline int stride() const { return derived().stride(); } - /** Returns a pointer to the first coefficient of the matrix or vector. - * This function has to be used together with the stride() function. * - * \sa MapBase::stride() */ + * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride(). + * + * \sa innerStride(), outerStride() + */ inline const Scalar* data() const { return m_data; } inline const Scalar& coeff(int row, int col) const { - if(IsRowMajor) - return m_data[col + row * stride()]; - else // column-major - return m_data[row + col * stride()]; + return m_data[col * colStride() + row * rowStride()]; } inline Scalar& coeffRef(int row, int col) { - if(IsRowMajor) - return const_cast(m_data)[col + row * stride()]; - else // column-major - return const_cast(m_data)[row + col * stride()]; + return const_cast(m_data)[col * colStride() + row * rowStride()]; } inline const Scalar& coeff(int index) const { ei_assert(Derived::IsVectorAtCompileTime || (ei_traits::Flags & LinearAccessBit)); - if ( ((RowsAtCompileTime == 1) == IsRowMajor) || !int(Derived::IsVectorAtCompileTime) ) - return m_data[index]; - else - return m_data[index*stride()]; + return m_data[index * innerStride()]; } inline Scalar& coeffRef(int index) { ei_assert(Derived::IsVectorAtCompileTime || (ei_traits::Flags & LinearAccessBit)); - if ( ((RowsAtCompileTime == 1) == IsRowMajor) || !int(Derived::IsVectorAtCompileTime) ) - return const_cast(m_data)[index]; - else - return const_cast(m_data)[index*stride()]; + return const_cast(m_data)[index * innerStride()]; } template inline PacketScalar packet(int row, int col) const { return ei_ploadt - (m_data + (IsRowMajor ? col + row * stride() - : row + col * stride())); + (m_data + (col * colStride() + row * rowStride())); } template inline PacketScalar packet(int index) const { - return ei_ploadt(m_data + index); + return ei_ploadt(m_data + index * innerStride()); } template inline void writePacket(int row, int col, const PacketScalar& x) { ei_pstoret - (const_cast(m_data) + (IsRowMajor ? col + row * stride() - : row + col * stride()), x); + (const_cast(m_data) + (col * colStride() + row * rowStride()), x); } template inline void writePacket(int index, const PacketScalar& x) { ei_pstoret - (const_cast(m_data) + index, x); + (const_cast(m_data) + index * innerStride(), x); } inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - checkDataAlignment(); + checkSanity(); } inline MapBase(const Scalar* data, int size) @@ -146,7 +125,7 @@ template class MapBase EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) ei_assert(size >= 0); ei_assert(data == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == size); - checkDataAlignment(); + checkSanity(); } inline MapBase(const Scalar* data, int rows, int cols) @@ -155,7 +134,7 @@ template class MapBase ei_assert( (data == 0) || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); - checkDataAlignment(); + checkSanity(); } Derived& operator=(const MapBase& other) @@ -167,10 +146,12 @@ template class MapBase protected: - void checkDataAlignment() const + void checkSanity() const { ei_assert( ((!(ei_traits::Flags&AlignedBit)) || ((size_t(m_data)&0xf)==0)) && "data is not aligned"); + ei_assert( ((!(ei_traits::Flags&PacketAccessBit)) + || (innerStride()==1)) && "packet access incompatible with inner stride greater than 1"); } const Scalar* EIGEN_RESTRICT m_data; diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index 44a0ef7d1..e7422457c 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2006-2010 Benoit Jacob // Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or @@ -318,6 +318,9 @@ class Matrix void swap(MatrixBase EIGEN_REF_TO_TEMPORARY other) { this->_swap(other.derived()); } + inline int innerStride() const { return 1; } + inline int outerStride() const { return this->innerSize(); } + /////////// Geometry module /////////// template @@ -331,6 +334,9 @@ class Matrix #endif protected: + template + friend struct ei_conservative_resize_like_impl; + using Base::m_storage; }; diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h index 046670452..ece603ffa 100644 --- a/Eigen/src/Core/MatrixStorage.h +++ b/Eigen/src/Core/MatrixStorage.h @@ -3,6 +3,7 @@ // // Copyright (C) 2008 Gael Guennebaud // Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2010 Hauke Heibel // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -49,6 +50,12 @@ struct ei_matrix_array ei_matrix_array(ei_constructor_without_unaligned_array_assert) {} }; +// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly +#ifdef __ARM_NEON__ + #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT + #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT + #endif +#endif #ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) #else @@ -92,6 +99,7 @@ template class ei_matr inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); } inline static int rows(void) {return _Rows;} inline static int cols(void) {return _Cols;} + inline void conservativeResize(int,int,int) {} inline void resize(int,int,int) {} inline const T *data() const { return m_data.array; } inline T *data() { return m_data.array; } @@ -107,6 +115,7 @@ template class ei_matrix_storage inline void swap(ei_matrix_storage& ) {} inline static int rows(void) {return _Rows;} inline static int cols(void) {return _Cols;} + inline void conservativeResize(int,int,int) {} inline void resize(int,int,int) {} inline const T *data() const { return 0; } inline T *data() { return 0; } @@ -127,11 +136,8 @@ template class ei_matrix_storage class ei_matrix_storage< inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } inline int rows(void) const {return m_rows;} inline int cols(void) const {return _Cols;} - inline void resize(int /*size*/, int rows, int) - { - m_rows = rows; - } + inline void conservativeResize(int, int rows, int) { m_rows = rows; } + inline void resize(int, int rows, int) { m_rows = rows; } inline const T *data() const { return m_data.array; } inline T *data() { return m_data.array; } }; @@ -170,10 +174,8 @@ template class ei_matrix_storage< inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } inline int rows(void) const {return _Rows;} inline int cols(void) const {return m_cols;} - inline void resize(int, int, int cols) - { - m_cols = cols; - } + inline void conservativeResize(int, int, int cols) { m_cols = cols; } + inline void resize(int, int, int cols) { m_cols = cols; } inline const T *data() const { return m_data.array; } inline T *data() { return m_data.array; } }; @@ -196,6 +198,12 @@ template class ei_matrix_storage(m_data, size, m_rows*m_cols); + m_rows = rows; + m_cols = cols; + } void resize(int size, int rows, int cols) { if(size != m_rows*m_cols) @@ -228,6 +236,11 @@ template class ei_matrix_storage(m_data, size, _Rows*m_cols); + m_cols = cols; + } void resize(int size, int, int cols) { if(size != _Rows*m_cols) @@ -259,6 +272,11 @@ template class ei_matrix_storage(m_data, size, m_rows*_Cols); + m_rows = rows; + } void resize(int size, int rows, int) { if(size != m_rows*_Cols) diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index 9f6d1c0c0..497ce828b 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -53,7 +53,8 @@ template class NestByValue inline int rows() const { return m_expression.rows(); } inline int cols() const { return m_expression.cols(); } - inline int stride() const { return m_expression.stride(); } + inline int outerStride() const { return m_expression.outerStride(); } + inline int innerStride() const { return m_expression.innerStride(); } inline const CoeffReturnType coeff(int row, int col) const { diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index fcd2e46cc..46884dc3f 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -47,7 +47,7 @@ * \sa class DiagonalMatrix */ template class PermutationMatrix; -template struct ei_permut_matrix_product_retval; +template struct ei_permut_matrix_product_retval; template struct ei_traits > @@ -132,6 +132,9 @@ class PermutationMatrix : public EigenBase void evalTo(MatrixBase& other) const @@ -213,16 +216,29 @@ class PermutationMatrix : public EigenBase inverse() const + { return *this; } + /** \returns the tranpose permutation matrix. + * + * \note \note_try_to_help_rvo + */ + inline Transpose transpose() const + { return *this; } + + /**** multiplication helpers to hopefully get RVO ****/ #ifndef EIGEN_PARSED_BY_DOXYGEN - protected: - enum Inverse_t {Inverse}; - PermutationMatrix(Inverse_t, const PermutationMatrix& other) - : m_indices(other.m_indices.size()) + template + PermutationMatrix(const Transpose >& other) + : m_indices(other.nestedPermutation().size()) { - for (int i=0; i& other) const { return PermutationMatrix(Product, *this, other); } + /** \returns the product of a permutation with another inverse permutation. + * + * \note \note_try_to_help_rvo + */ + template + inline PermutationMatrix operator*(const Transpose >& other) const + { return PermutationMatrix(Product, *this, other.eval()); } + + /** \returns the product of an inverse permutation with another permutation. + * + * \note \note_try_to_help_rvo + */ + template friend + inline PermutationMatrix operator*(const Transpose >& other, const PermutationMatrix& perm) + { return PermutationMatrix(Product, other.eval(), perm); } + protected: IndicesType m_indices; @@ -277,15 +304,15 @@ operator*(const PermutationMatrix &perm (permutation, matrix.derived()); } -template -struct ei_traits > +template +struct ei_traits > { typedef typename MatrixType::PlainObject ReturnType; }; -template +template struct ei_permut_matrix_product_retval - : public ReturnByValue > + : public ReturnByValue > { typedef typename ei_cleantype::type MatrixTypeNestedCleaned; @@ -299,21 +326,46 @@ struct ei_permut_matrix_product_retval template inline void evalTo(Dest& dst) const { const int n = Side==OnTheLeft ? rows() : cols(); - for(int i = 0; i < n; ++i) + + if(ei_is_same_type::ret && ei_extract_data(dst) == ei_extract_data(m_matrix)) { - Block< - Dest, - Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, - Side==OnTheRight ? 1 : Dest::ColsAtCompileTime - >(dst, Side==OnTheLeft ? m_permutation.indices().coeff(i) : i) + // apply the permutation inplace + Matrix mask(m_permutation.size()); + mask.fill(false); + int r = 0; + while(r < m_permutation.size()) + { + // search for the next seed + while(r=m_permutation.size()) + break; + // we got one, let's follow it until we are back to the seed + int k0 = r++; + int kPrev = k0; + mask.coeffRef(k0) = true; + for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k)) + { + Block(dst, k) + .swap(Block + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); - = + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(int i = 0; i < n; ++i) + { + Block + (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) - Block< - MatrixTypeNestedCleaned, - Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime, - Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime - >(m_matrix, Side==OnTheRight ? m_permutation.indices().coeff(i) : i); + = + + Block + (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i); + } } } @@ -322,4 +374,78 @@ struct ei_permut_matrix_product_retval const typename MatrixType::Nested m_matrix; }; +/* Template partial specialization for transposed/inverse permutations */ + +template +struct ei_traits > > + : ei_traits > +{}; + +template +class Transpose > + : public EigenBase > > +{ + typedef PermutationMatrix PermutationType; + typedef typename PermutationType::IndicesType IndicesType; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef ei_traits Traits; + typedef Matrix + DenseMatrixType; + enum { + Flags = Traits::Flags, + CoeffReadCost = Traits::CoeffReadCost, + RowsAtCompileTime = Traits::RowsAtCompileTime, + ColsAtCompileTime = Traits::ColsAtCompileTime, + MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + }; + typedef typename Traits::Scalar Scalar; + #endif + + Transpose(const PermutationType& p) : m_permutation(p) {} + + inline int rows() const { return m_permutation.rows(); } + inline int cols() const { return m_permutation.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const + { + other.setZero(); + for (int i=0; i friend + inline const ei_permut_matrix_product_retval + operator*(const MatrixBase& matrix, const Transpose& trPerm) + { + return ei_permut_matrix_product_retval(trPerm.m_permutation, matrix.derived()); + } + + /** \returns the matrix with the inverse permutation applied to the rows. + */ + template + inline const ei_permut_matrix_product_retval + operator*(const MatrixBase& matrix) const + { + return ei_permut_matrix_product_retval(m_permutation, matrix.derived()); + } + + const PermutationType& nestedPermutation() const { return m_permutation; } + + protected: + const PermutationType& m_permutation; +}; + #endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index fe6d29c7d..865387b11 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -68,9 +68,9 @@ template struct ei_product_type // is to work around an internal compiler error with gcc 4.1 and 4.2. private: enum { - rows_select = Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small), - cols_select = Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small), - depth_select = Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small) + rows_select = Rows >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Rows==1 ? 1 : Small), + cols_select = Cols >=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Cols==1 ? 1 : Small), + depth_select = Depth>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ? Large : (Depth==1 ? 1 : Small) }; typedef ei_product_type_selector product_type_selector; @@ -93,12 +93,12 @@ template<> struct ei_product_type_selector template<> struct ei_product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; template<> struct ei_product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; template<> struct ei_product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; -template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = GemvProduct }; }; +template<> struct ei_product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct ei_product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; -template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = GemvProduct }; }; -template<> struct ei_product_type_selector { enum { ret = GemvProduct }; }; +template<> struct ei_product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; }; +template<> struct ei_product_type_selector { enum { ret = CoeffBasedProductMode }; }; template<> struct ei_product_type_selector { enum { ret = GemvProduct }; }; -template<> struct ei_product_type_selector { enum { ret = GemvProduct }; }; +template<> struct ei_product_type_selector { enum { ret = CoeffBasedProductMode }; }; template<> struct ei_product_type_selector { enum { ret = GemmProduct }; }; template<> struct ei_product_type_selector { enum { ret = GemmProduct }; }; template<> struct ei_product_type_selector { enum { ret = GemmProduct }; }; @@ -427,6 +427,10 @@ template inline const typename ProductReturnType::Type MatrixBase::operator*(const MatrixBase &other) const { + // A note regarding the function declaration: In MSVC, this function will sometimes + // not be inlined since ei_matrix_storage is an unwindable object for dynamic + // matrices and product types are holding a member to store the result. + // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. enum { ProductIsValid = Derived::ColsAtCompileTime==Dynamic || OtherDerived::RowsAtCompileTime==Dynamic diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index d5b0c60c2..75297dcb9 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -40,7 +40,7 @@ struct ei_redux_traits private: enum { PacketSize = ei_packet_traits::size, - InnerMaxSize = int(Derived::Flags)&RowMajorBit + InnerMaxSize = int(Derived::IsRowMajor) ? Derived::MaxColsAtCompileTime : Derived::MaxRowsAtCompileTime }; @@ -100,15 +100,15 @@ template struct ei_redux_novec_unroller { enum { - col = Start / Derived::RowsAtCompileTime, - row = Start % Derived::RowsAtCompileTime + outer = Start / Derived::InnerSizeAtCompileTime, + inner = Start % Derived::InnerSizeAtCompileTime }; typedef typename Derived::Scalar Scalar; EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&) { - return mat.coeff(row, col); + return mat.coeffByOuterInner(outer, inner); } }; @@ -148,12 +148,8 @@ struct ei_redux_vec_unroller { enum { index = Start * ei_packet_traits::size, - row = int(Derived::Flags)&RowMajorBit - ? index / int(Derived::ColsAtCompileTime) - : index % Derived::RowsAtCompileTime, - col = int(Derived::Flags)&RowMajorBit - ? index % int(Derived::ColsAtCompileTime) - : index / Derived::RowsAtCompileTime, + outer = index / int(Derived::InnerSizeAtCompileTime), + inner = index % int(Derived::InnerSizeAtCompileTime), alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned }; @@ -162,7 +158,7 @@ struct ei_redux_vec_unroller EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&) { - return mat.template packet(row, col); + return mat.template packetByOuterInner(outer, inner); } }; @@ -184,12 +180,12 @@ struct ei_redux_impl { ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix"); Scalar res; - res = mat.coeff(0, 0); - for(int i = 1; i < mat.rows(); ++i) - res = func(res, mat.coeff(i, 0)); - for(int j = 1; j < mat.cols(); ++j) - for(int i = 0; i < mat.rows(); ++i) - res = func(res, mat.coeff(i, j)); + res = mat.coeffByOuterInner(0, 0); + for(int i = 1; i < mat.innerSize(); ++i) + res = func(res, mat.coeffByOuterInner(0, i)); + for(int i = 1; i < mat.outerSize(); ++i) + for(int j = 0; j < mat.innerSize(); ++j) + res = func(res, mat.coeffByOuterInner(i, j)); return res; } }; @@ -253,8 +249,7 @@ struct ei_redux_impl const int innerSize = mat.innerSize(); const int outerSize = mat.outerSize(); enum { - packetSize = ei_packet_traits::size, - isRowMajor = Derived::Flags&RowMajorBit?1:0 + packetSize = ei_packet_traits::size }; const int packetedInnerSize = ((innerSize)/packetSize)*packetSize; Scalar res; @@ -263,13 +258,12 @@ struct ei_redux_impl PacketScalar packet_res = mat.template packet(0,0); for(int j=0; j - (isRowMajor?j:i, isRowMajor?i:j)); + packet_res = func.packetOp(packet_res, mat.template packetByOuterInner(j,i)); res = func.predux(packet_res); for(int j=0; j > : public ei_traits::ReturnType> { enum { - // FIXME had to remove the DirectAccessBit for usage like - // matrix.inverse().block(...) - // because the Block ctor with direct access - // wants to call coeffRef() to get an address, and that fails (infinite recursion) as ReturnByValue - // doesnt implement coeffRef(). - // The fact that I had to do that shows that when doing xpr.block() with a non-direct-access xpr, - // even if xpr has the EvalBeforeNestingBit, the block() doesn't use direct access on the evaluated - // xpr. + // We're disabling the DirectAccess because e.g. the constructor of + // the Block-with-DirectAccess expression requires to have a coeffRef method. + // Also, we don't want to have to implement the stride stuff. Flags = (ei_traits::ReturnType>::Flags | EvalBeforeNestingBit) & ~DirectAccessBit }; diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index c3c5b17ff..0b57b9968 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -75,8 +75,9 @@ template class SelfAdjointView inline int rows() const { return m_matrix.rows(); } inline int cols() const { return m_matrix.cols(); } - inline int stride() const { return m_matrix.stride(); } - + inline int outerStride() const { return m_matrix.outerStride(); } + inline int innerStride() const { return m_matrix.innerStride(); } + /** \sa MatrixBase::coeff() * \warning the coordinates must fit into the referenced triangular part */ diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index d2690b66b..529a9994d 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -57,7 +57,8 @@ template class SelfCwiseBinaryOp inline int rows() const { return m_matrix.rows(); } inline int cols() const { return m_matrix.cols(); } - inline int stride() const { return m_matrix.stride(); } + inline int outerStride() const { return m_matrix.outerStride(); } + inline int innerStride() const { return m_matrix.innerStride(); } inline const Scalar* data() const { return m_matrix.data(); } // note that this function is needed by assign to correctly align loads/stores diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h new file mode 100644 index 000000000..d960dd2fc --- /dev/null +++ b/Eigen/src/Core/Stride.h @@ -0,0 +1,113 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_STRIDE_H +#define EIGEN_STRIDE_H + +/** \class Stride + * + * \brief Holds strides information for Map + * + * This class holds the strides information for mapping arrays with strides with class Map. + * + * It holds two values: the inner stride and the outer stride. + * + * The inner stride is the pointer increment between two consecutive entries within a given row of a + * row-major matrix or within a given column of a column-major matrix. + * + * The outer stride is the pointer increment between two consecutive rows of a row-major matrix or + * between two consecutive columns of a column-major matrix. + * + * These two values can be passed either at compile-time as template parameters, or at runtime as + * arguments to the constructor. + * + * Indeed, this class takes two template parameters: + * \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime. + * \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime. + * + * \include Map_general_stride.cpp + * Output: \verbinclude Map_general_stride.out + * + * \sa class InnerStride, class OuterStride + */ +template +class Stride +{ + public: + + enum { + InnerStrideAtCompileTime = _InnerStrideAtCompileTime, + OuterStrideAtCompileTime = _OuterStrideAtCompileTime + }; + + /** Default constructor, for use when strides are fixed at compile time */ + Stride() + : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime) + { + ei_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic); + } + + /** Constructor allowing to pass the strides at runtime */ + Stride(int outerStride, int innerStride) + : m_outer(outerStride), m_inner(innerStride) + { + ei_assert(innerStride>=0 && outerStride>=0); + } + + /** Copy constructor */ + Stride(const Stride& other) + : m_outer(other.outer()), m_inner(other.inner()) + {} + + /** \returns the outer stride */ + inline int outer() const { return m_outer.value(); } + /** \returns the inner stride */ + inline int inner() const { return m_inner.value(); } + + protected: + ei_int_if_dynamic m_outer; + ei_int_if_dynamic m_inner; +}; + +/** \brief Convenience specialization of Stride to specify only an inner stride */ +template +class InnerStride : public Stride<0, Value> +{ + typedef Stride<0, Value> Base; + public: + InnerStride() : Base() {} + InnerStride(int v) : Base(0, v) {} +}; + +/** \brief Convenience specialization of Stride to specify only an outer stride */ +template +class OuterStride : public Stride +{ + typedef Stride Base; + public: + OuterStride() : Base() {} + OuterStride(int v) : Base(v,0) {} +}; + +#endif // EIGEN_STRIDE_H diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h index 186268af0..c3c641097 100644 --- a/Eigen/src/Core/Swap.h +++ b/Eigen/src/Core/Swap.h @@ -47,7 +47,8 @@ template class SwapWrapper inline int rows() const { return m_expression.rows(); } inline int cols() const { return m_expression.cols(); } - inline int stride() const { return m_expression.stride(); } + inline int outerStride() const { return m_expression.outerStride(); } + inline int innerStride() const { return m_expression.innerStride(); } inline Scalar& coeffRef(int row, int col) { @@ -60,7 +61,7 @@ template class SwapWrapper } template - void copyCoeff(int row, int col, const MatrixBase& other) + void copyCoeff(int row, int col, const DenseBase& other) { OtherDerived& _other = other.const_cast_derived(); ei_internal_assert(row >= 0 && row < rows() @@ -71,7 +72,7 @@ template class SwapWrapper } template - void copyCoeff(int index, const MatrixBase& other) + void copyCoeff(int index, const DenseBase& other) { OtherDerived& _other = other.const_cast_derived(); ei_internal_assert(index >= 0 && index < m_expression.size()); @@ -81,7 +82,7 @@ template class SwapWrapper } template - void copyPacket(int row, int col, const MatrixBase& other) + void copyPacket(int row, int col, const DenseBase& other) { OtherDerived& _other = other.const_cast_derived(); ei_internal_assert(row >= 0 && row < rows() @@ -94,7 +95,7 @@ template class SwapWrapper } template - void copyPacket(int index, const MatrixBase& other) + void copyPacket(int index, const DenseBase& other) { OtherDerived& _other = other.const_cast_derived(); ei_internal_assert(index >= 0 && index < m_expression.size()); diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index bd06d8464..1f064d1c2 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -93,7 +93,8 @@ template class TransposeImpl typedef typename MatrixType::template MakeBase >::Type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Transpose) - inline int stride() const { return derived().nestedExpression().stride(); } + inline int innerStride() const { return derived().nestedExpression().innerStride(); } + inline int outerStride() const { return derived().nestedExpression().outerStride(); } inline Scalar* data() { return derived().nestedExpression().data(); } inline const Scalar* data() const { return derived().nestedExpression().data(); } @@ -295,25 +296,6 @@ struct ei_blas_traits > static inline const XprType extract(const XprType& x) { return x; } }; - -template::ActualAccess> -struct ei_extract_data_selector { - static typename T::Scalar* run(const T& m) - { - return &ei_blas_traits::extract(m).const_cast_derived().coeffRef(0,0); - } -}; - -template -struct ei_extract_data_selector { - static typename T::Scalar* run(const T&) { return 0; } -}; - -template typename T::Scalar* ei_extract_data(const T& m) -{ - return ei_extract_data_selector::run(m); -} - template struct ei_check_transpose_aliasing_selector { diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 7d978b800..2230680d1 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -50,7 +50,8 @@ template class TriangularBase : public EigenBase inline int rows() const { return derived().rows(); } inline int cols() const { return derived().cols(); } - inline int stride() const { return derived().stride(); } + inline int outerStride() const { return derived().outerStride(); } + inline int innerStride() const { return derived().innerStride(); } inline Scalar coeff(int row, int col) const { return derived().coeff(row,col); } inline Scalar& coeffRef(int row, int col) { return derived().coeffRef(row,col); } @@ -165,7 +166,8 @@ template class TriangularView inline int rows() const { return m_matrix.rows(); } inline int cols() const { return m_matrix.cols(); } - inline int stride() const { return m_matrix.stride(); } + inline int outerStride() const { return m_matrix.outerStride(); } + inline int innerStride() const { return m_matrix.innerStride(); } /** \sa MatrixBase::operator+=() */ template TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; } diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h index cbf97aeb3..5bb7fd35d 100644 --- a/Eigen/src/Core/VectorBlock.h +++ b/Eigen/src/Core/VectorBlock.h @@ -86,7 +86,6 @@ template class VectorBlock IsColVector ? start : 0, IsColVector ? 0 : start, IsColVector ? size : 1, IsColVector ? 1 : size) { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 1526a4b97..449de2078 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -169,6 +169,11 @@ template<> inline v4f ei_pdiv(const v4f& a, const v4f& b) { return res; } +template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) +{ ei_assert(false && "packet integer division are not supported by AltiVec"); + return ei_pset1(0); +} + template<> inline v4f ei_pmadd(const v4f& a, const v4f& b, const v4f& c) { return vec_madd(a, b, c); } template<> inline v4f ei_pmin(const v4f& a, const v4f& b) { return vec_min(a,b); } diff --git a/Eigen/src/Core/arch/CMakeLists.txt b/Eigen/src/Core/arch/CMakeLists.txt index 8ddba284e..5470ed8f3 100644 --- a/Eigen/src/Core/arch/CMakeLists.txt +++ b/Eigen/src/Core/arch/CMakeLists.txt @@ -1,2 +1,3 @@ ADD_SUBDIRECTORY(SSE) -ADD_SUBDIRECTORY(AltiVec) \ No newline at end of file +ADD_SUBDIRECTORY(AltiVec) +ADD_SUBDIRECTORY(NEON) diff --git a/Eigen/src/Core/arch/Default/Settings.h b/Eigen/src/Core/arch/Default/Settings.h new file mode 100644 index 000000000..1e7cebdba --- /dev/null +++ b/Eigen/src/Core/arch/Default/Settings.h @@ -0,0 +1,65 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + + +/* All the parameters defined in this file can be specialized in the + * architecture specific files, and/or by the user. + * More to come... */ + +#ifndef EIGEN_DEFAULT_SETTINGS_H +#define EIGEN_DEFAULT_SETTINGS_H + +/** Defines the maximal loop size to enable meta unrolling of loops. + * Note that the value here is expressed in Eigen's own notion of "number of FLOPS", + * it does not correspond to the number of iterations or the number of instructions + */ +#ifndef EIGEN_UNROLLING_LIMIT +#define EIGEN_UNROLLING_LIMIT 100 +#endif + +/** Defines the threshold between a "small" and a "large" matrix. + * This threshold is mainly used to select the proper product implementation. + */ +#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 +#endif + +/** Defines the maximal size in Bytes of blocks fitting in CPU cache. + * The current value is set to generate blocks of 256x256 for float + * + * Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes + */ +#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE +#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256) +#endif + +/** Defines the maximal width of the blocks used in the triangular product and solver + * for vectors (level 2 blas xTRMV and xTRSV). The default is 8. + */ +#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH +#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8 +#endif + +#endif // EIGEN_DEFAULT_SETTINGS_H diff --git a/Eigen/src/Core/arch/NEON/CMakeLists.txt b/Eigen/src/Core/arch/NEON/CMakeLists.txt new file mode 100644 index 000000000..fd4d4af50 --- /dev/null +++ b/Eigen/src/Core/arch/NEON/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Core_arch_NEON_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel +) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h new file mode 100644 index 000000000..f71b92a75 --- /dev/null +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -0,0 +1,372 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2010 Konstantinos Margaritis +// Heavily based on Gael's SSE version. +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_PACKET_MATH_NEON_H +#define EIGEN_PACKET_MATH_NEON_H + +#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 +#endif + +#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE +#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*96*96 +#endif + +typedef float32x4_t Packet4f; +typedef int32x4_t Packet4i; + +#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ + const Packet4f ei_p4f_##NAME = ei_pset1(X) + +#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ + const Packet4f ei_p4f_##NAME = vreinterpretq_f32_u32(ei_pset1(X)) + +#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ + const Packet4i ei_p4i_##NAME = ei_pset1(X) + +template<> struct ei_packet_traits : ei_default_packet_traits +{ + typedef Packet4f type; enum {size=4}; + enum { + HasSin = 0, + HasCos = 0, + HasLog = 0, + HasExp = 0, + HasSqrt = 0 + }; +}; +template<> struct ei_packet_traits : ei_default_packet_traits +{ typedef Packet4i type; enum {size=4}; }; + +template<> struct ei_unpacket_traits { typedef float type; enum {size=4}; }; +template<> struct ei_unpacket_traits { typedef int type; enum {size=4}; }; + +template<> EIGEN_STRONG_INLINE Packet4f ei_pset1(const float& from) { return vdupq_n_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pset1(const int& from) { return vdupq_n_s32(from); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_plset(const float& a) +{ + Packet4f countdown = { 3, 2, 1, 0 }; + return vaddq_f32(ei_pset1(a), countdown); +} +template<> EIGEN_STRONG_INLINE Packet4i ei_plset(const int& a) +{ + Packet4i countdown = { 3, 2, 1, 0 }; + return vaddq_s32(ei_pset1(a), countdown); +} + +template<> EIGEN_STRONG_INLINE Packet4f ei_padd(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i ei_padd(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_psub(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i ei_psub(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pnegate(const Packet4f& a) { return vnegq_f32(a); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pnegate(const Packet4i& a) { return vnegq_s32(a); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pmul(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pmul(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv(const Packet4f& a, const Packet4f& b) +{ + Packet4f inv, restep, div; + + // NEON does not offer a divide instruction, we have to do a reciprocal approximation + // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers + // a reciprocal estimate AND a reciprocal step -which saves a few instructions + // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with + // Newton-Raphson and vrecpsq_f32() + inv = vrecpeq_f32(b); + + // This returns a differential, by which we will have to multiply inv to get a better + // approximation of 1/b. + restep = vrecpsq_f32(b, inv); + inv = vmulq_f32(restep, inv); + + // Finally, multiply a by 1/b and get the wanted result of the division. + div = vmulq_f32(a, inv); + + return div; +} +template<> EIGEN_STRONG_INLINE Packet4i ei_pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) +{ ei_assert(false && "packet integer division are not supported by NEON"); + return ei_pset1(0); +} + +// for some weird raisons, it has to be overloaded for packet of integers +template<> EIGEN_STRONG_INLINE Packet4i ei_pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return ei_padd(ei_pmul(a,b), c); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pmin(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pmin(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pmax(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pmax(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); } + +// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics +template<> EIGEN_STRONG_INLINE Packet4f ei_pand(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i ei_pand(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_por(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i ei_por(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pxor(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i ei_pxor(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pandnot(const Packet4f& a, const Packet4f& b) +{ + return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); +} +template<> EIGEN_STRONG_INLINE Packet4i ei_pandnot(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } + +template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } +template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } + +template<> EIGEN_STRONG_INLINE void ei_pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void ei_pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } + +template<> EIGEN_STRONG_INLINE void ei_pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } +template<> EIGEN_STRONG_INLINE void ei_pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } + +// FIXME only store the 2 first elements ? +template<> EIGEN_STRONG_INLINE float ei_pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } +template<> EIGEN_STRONG_INLINE int ei_pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } + +template<> EIGEN_STRONG_INLINE Packet4f ei_preverse(const Packet4f& a) { + float32x2_t a_lo, a_hi; + Packet4f a_r64, a_r128; + + a_r64 = vrev64q_f32(a); + a_lo = vget_low_f32(a_r64); + a_hi = vget_high_f32(a_r64); + a_r128 = vcombine_f32(a_hi, a_lo); + + return a_r128; +} +template<> EIGEN_STRONG_INLINE Packet4i ei_preverse(const Packet4i& a) { + int32x2_t a_lo, a_hi; + Packet4i a_r64, a_r128; + + a_r64 = vrev64q_s32(a); + a_lo = vget_low_s32(a_r64); + a_hi = vget_high_s32(a_r64); + a_r128 = vcombine_s32(a_hi, a_lo); + + return a_r128; +} +template<> EIGEN_STRONG_INLINE Packet4f ei_pabs(const Packet4f& a) { return vabsq_f32(a); } +template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { return vabsq_s32(a); } + +template<> EIGEN_STRONG_INLINE float ei_predux(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, sum; + float s[2]; + + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + sum = vpadd_f32(a_lo, a_hi); + sum = vpadd_f32(sum, sum); + vst1_f32(s, sum); + + return s[0]; +} + +template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp(const Packet4f* vecs) +{ + float32x4x2_t vtrn1, vtrn2, res1, res2; + Packet4f sum1, sum2, sum; + + // NEON zip performs interleaving of the supplied vectors. + // We perform two interleaves in a row to acquire the transposed vector + vtrn1 = vzipq_f32(vecs[0], vecs[2]); + vtrn2 = vzipq_f32(vecs[1], vecs[3]); + res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]); + res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]); + + // Do the addition of the resulting vectors + sum1 = vaddq_f32(res1.val[0], res1.val[1]); + sum2 = vaddq_f32(res2.val[0], res2.val[1]); + sum = vaddq_f32(sum1, sum2); + + return sum; +} + +template<> EIGEN_STRONG_INLINE int ei_predux(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, sum; + int32_t s[2]; + + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + sum = vpadd_s32(a_lo, a_hi); + sum = vpadd_s32(sum, sum); + vst1_s32(s, sum); + + return s[0]; +} + +template<> EIGEN_STRONG_INLINE Packet4i ei_preduxp(const Packet4i* vecs) +{ + int32x4x2_t vtrn1, vtrn2, res1, res2; + Packet4i sum1, sum2, sum; + + // NEON zip performs interleaving of the supplied vectors. + // We perform two interleaves in a row to acquire the transposed vector + vtrn1 = vzipq_s32(vecs[0], vecs[2]); + vtrn2 = vzipq_s32(vecs[1], vecs[3]); + res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]); + res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]); + + // Do the addition of the resulting vectors + sum1 = vaddq_s32(res1.val[0], res1.val[1]); + sum2 = vaddq_s32(res2.val[0], res2.val[1]); + sum = vaddq_s32(sum1, sum2); + + return sum; +} + +// Other reduction functions: +// mul +template<> EIGEN_STRONG_INLINE float ei_predux_mul(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, prod; + float s[2]; + + // Get a_lo = |a1|a2| and a_hi = |a3|a4| + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + // Get the product of a_lo * a_hi -> |a1*a3|a2*a4| + prod = vmul_f32(a_lo, a_hi); + // Multiply prod with its swapped value |a2*a4|a1*a3| + prod = vmul_f32(prod, vrev64_f32(prod)); + vst1_f32(s, prod); + + return s[0]; +} +template<> EIGEN_STRONG_INLINE int ei_predux_mul(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, prod; + int32_t s[2]; + + // Get a_lo = |a1|a2| and a_hi = |a3|a4| + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + // Get the product of a_lo * a_hi -> |a1*a3|a2*a4| + prod = vmul_s32(a_lo, a_hi); + // Multiply prod with its swapped value |a2*a4|a1*a3| + prod = vmul_s32(prod, vrev64_s32(prod)); + vst1_s32(s, prod); + + return s[0]; +} + +// min +template<> EIGEN_STRONG_INLINE float ei_predux_min(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, min; + float s[2]; + + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + min = vpmin_f32(a_lo, a_hi); + min = vpmin_f32(min, min); + vst1_f32(s, min); + + return s[0]; +} +template<> EIGEN_STRONG_INLINE int ei_predux_min(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, min; + int32_t s[2]; + + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + min = vpmin_s32(a_lo, a_hi); + min = vpmin_s32(min, min); + vst1_s32(s, min); + + return s[0]; +} + +// max +template<> EIGEN_STRONG_INLINE float ei_predux_max(const Packet4f& a) +{ + float32x2_t a_lo, a_hi, max; + float s[2]; + + a_lo = vget_low_f32(a); + a_hi = vget_high_f32(a); + max = vpmax_f32(a_lo, a_hi); + max = vpmax_f32(max, max); + vst1_f32(s, max); + + return s[0]; +} +template<> EIGEN_STRONG_INLINE int ei_predux_max(const Packet4i& a) +{ + int32x2_t a_lo, a_hi, max; + int32_t s[2]; + + a_lo = vget_low_s32(a); + a_hi = vget_high_s32(a); + max = vpmax_s32(a_lo, a_hi); + max = vpmax_s32(max, max); + vst1_s32(s, max); + + return s[0]; +} + +template +struct ei_palign_impl +{ + EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) + { + if (Offset!=0) + first = vextq_f32(first, second, Offset); + } +}; + +template +struct ei_palign_impl +{ + EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) + { + if (Offset!=0) + first = vextq_s32(first, second, Offset); + } +}; +#endif // EIGEN_PACKET_MATH_NEON_H diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index de96aaffa..282a1971c 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d ei_pmul(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_pmul(const Packet4i& a, const Packet4i& b) { -#ifdef __SSE4_1__ +#ifdef EIGEN_VECTORIZE_SSE4_1 return _mm_mullo_epi32(a,b); #else // this version is slightly faster than 4 scalar products @@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a) } template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a) { - #ifdef __SSSE3__ + #ifdef EIGEN_VECTORIZE_SSSE3 return _mm_abs_epi32(a); #else Packet4i aux = _mm_srai_epi32(a,31); @@ -285,7 +285,7 @@ EIGEN_STRONG_INLINE void ei_punpackp(Packet4f* vecs) vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00)); } -#ifdef __SSE3__ +#ifdef EIGEN_VECTORIZE_SSE3 // TODO implement SSE2 versions as well as integer versions template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp(const Packet4f* vecs) { @@ -446,7 +446,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max(const Packet4i& a) // } #endif -#ifdef __SSSE3__ +#ifdef EIGEN_VECTORIZE_SSSE3 // SSSE3 versions template struct ei_palign_impl diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index 3343b1875..e8016e915 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -305,10 +305,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Block::ColsAtCompileTime>, - Block::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col)); + res = lhs.row(row).cwiseProduct(rhs.col(col)).sum(); } }; @@ -319,10 +316,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Lhs, - Block::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col)); + res = lhs.cwiseProduct(rhs.col(col)).sum(); } }; @@ -331,10 +325,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Block::ColsAtCompileTime>, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs); + res = lhs.row(row).cwiseProduct(rhs).sum(); } }; @@ -343,10 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Lhs, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs); + res = lhs.cwiseProduct(rhs).sum(); } }; diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 6836a10de..8ac5afb05 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -31,6 +31,7 @@ #define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); #else #define CJMADD(A,B,C,T) T = B; T = cj.pmul(A,T); C = ei_padd(C,T); +// #define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T); #endif // optimized GEneral packed Block * packed Panel product kernel @@ -146,7 +147,7 @@ struct ei_gebp_kernel #endif // performs "inner" product - // TODO let's check wether the flowing peeled loop could not be + // TODO let's check wether the folowing peeled loop could not be // optimized via optimal prefetching from one loop to the other const Scalar* blB = unpackedB; for(int k=0; k * RhsBlasTraits::extractScalarFactor(m_rhs); ei_product_selfadjoint_matrix::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, NumTraits::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), EIGEN_LOGICAL_XOR(RhsIsUpper, diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 2ca463d5d..4d216d77a 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -236,4 +236,22 @@ struct ei_blas_traits > static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); } }; +template::ActualAccess> +struct ei_extract_data_selector { + static const typename T::Scalar* run(const T& m) + { + return &ei_blas_traits::extract(m).const_cast_derived().coeffRef(0,0); // FIXME this should be .data() + } +}; + +template +struct ei_extract_data_selector { + static typename T::Scalar* run(const T&) { return 0; } +}; + +template const typename T::Scalar* ei_extract_data(const T& m) +{ + return ei_extract_data_selector::run(m); +} + #endif // EIGEN_BLASUTIL_H diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 51590b03d..c167df697 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -86,11 +86,11 @@ const unsigned int EvalBeforeAssigningBit = 0x4; * Long version: means that the coefficients can be handled by packets * and start at a memory location whose alignment meets the requirements * of the present CPU architecture for optimized packet access. In the fixed-size - * case, there is the additional condition that the total size of the coefficients - * array is a multiple of the packet size, so that it is possible to access all the - * coefficients by packets. In the dynamic-size case, there is no such condition - * on the total size, so it might not be possible to access the few last coeffs - * by packets. + * case, there is the additional condition that it be possible to access all the + * coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes, + * and that any nontrivial strides don't break the alignment). In the dynamic-size case, + * there is no such condition on the total size and strides, so it might not be possible to access + * all coeffs by packets. * * \note This bit can be set regardless of whether vectorization is actually enabled. * To check for actual vectorizability, see \a ActualPacketAccessBit. @@ -140,7 +140,7 @@ const unsigned int LinearAccessBit = 0x10; * Means that the underlying array of coefficients can be directly accessed. This means two things. * First, references to the coefficients must be available through coeffRef(int, int). This rules out read-only * expressions whose coefficients are computed on demand by coeff(int, int). Second, the memory layout of the - * array of coefficients must be exactly the natural one suggested by rows(), cols(), stride(), and the RowMajorBit. + * array of coefficients must be exactly the natural one suggested by rows(), cols(), outerStride(), innerStride(), and the RowMajorBit. * This rules out expressions such as Diagonal, whose coefficients, though referencable, do not have * such a regular memory layout. */ diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index a56e4fb84..aa01fdab2 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -1,7 +1,8 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2007-2009 Benoit Jacob +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008-2009 Gael Guennebaud // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -40,8 +41,10 @@ template class NestByValue; template class ForceAlignedAccess; template class SwapWrapper; template class Minor; +// MSVC will not compile when the expression ei_traits::Flags&DirectAccessBit +// is put into brackets like (ei_traits::Flags&DirectAccessBit)! template::Flags&DirectAccessBit) ? HasDirectAccess : NoDirectAccess> class Block; + int _DirectAccessStatus = ei_traits::Flags&DirectAccessBit ? HasDirectAccess : NoDirectAccess> class Block; template class VectorBlock; template class Transpose; template class Conjugate; @@ -60,7 +63,9 @@ template class DiagonalProduct; template class Diagonal; -template class Map; +template class Stride; +template > class Map; + template class TriangularBase; template class TriangularView; template class SelfAdjointView; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index ba92f2370..7968d6604 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -39,7 +39,7 @@ // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable 16 byte alignment on all // platforms where vectorization might be enabled. In theory we could always enable alignment, but it can be a cause of problems // on some platforms, so we just disable it in certain common platform (compiler+architecture combinations) to avoid these problems. -#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__)) +#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__) || defined(__ARM_NEON__)) #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 1 #else #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 0 @@ -78,30 +78,6 @@ #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor #endif -/** Defines the maximal loop size to enable meta unrolling of loops. - * Note that the value here is expressed in Eigen's own notion of "number of FLOPS", - * it does not correspond to the number of iterations or the number of instructions - */ -#ifndef EIGEN_UNROLLING_LIMIT -#define EIGEN_UNROLLING_LIMIT 100 -#endif - -/** Defines the maximal size in Bytes of blocks fitting in CPU cache. - * The current value is set to generate blocks of 256x256 for float - * - * Typically for a single-threaded application you would set that to 25% of the size of your CPU caches in bytes - */ -#ifndef EIGEN_TUNE_FOR_CPU_CACHE_SIZE -#define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256) -#endif - -/** Defines the maximal width of the blocks used in the triangular product and solver - * for vectors (level 2 blas xTRMV and xTRSV). The default is 8. - */ -#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH -#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8 -#endif - /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: @@ -211,7 +187,7 @@ using Eigen::ei_cos; */ #if !EIGEN_ALIGN #define EIGEN_ALIGN_TO_BOUNDARY(n) -#elif (defined __GNUC__) +#elif (defined __GNUC__) || (defined __PGI) #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) #elif (defined _MSC_VER) #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index c7b95d334..4d037b998 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -4,6 +4,7 @@ // Copyright (C) 2008 Gael Guennebaud // Copyright (C) 2008-2009 Benoit Jacob // Copyright (C) 2009 Kenneth Riddile +// Copyright (C) 2010 Hauke Heibel // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -24,26 +25,49 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . + +/***************************************************************************** +*** Platform checks for aligned malloc functions *** +*****************************************************************************/ + #ifndef EIGEN_MEMORY_H #define EIGEN_MEMORY_H -// FreeBSD 6 seems to have 16-byte aligned malloc -// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup -// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures -// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup -#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__) -#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 +// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see: +// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html +// This is true at least since glibc 2.8. +// This leaves the question how to detect 64-bit. According to this document, +// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf +// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed +// quite safe, at least within the context of glibc, to equate 64-bit with LP64. +#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \ + && defined(__LP64__) + #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1 #else -#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 + #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0 #endif -#if defined(__APPLE__) || defined(_WIN64) || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED +// FreeBSD 6 seems to have 16-byte aligned malloc +// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup +// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures +// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup +#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__) + #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 +#else + #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 +#endif + +#if defined(__APPLE__) \ + || defined(_WIN64) \ + || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ + || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED #define EIGEN_MALLOC_ALREADY_ALIGNED 1 #else #define EIGEN_MALLOC_ALREADY_ALIGNED 0 #endif -#if ((defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) +#if ((defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \ + && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) #define EIGEN_HAS_POSIX_MEMALIGN 1 #else #define EIGEN_HAS_POSIX_MEMALIGN 0 @@ -55,26 +79,90 @@ #define EIGEN_HAS_MM_MALLOC 0 #endif -/** \internal like malloc, but the returned pointer is guaranteed to be 16-byte aligned. - * Fast, but wastes 16 additional bytes of memory. - * Does not throw any exception. +/***************************************************************************** +*** Implementation of handmade aligned functions *** +*****************************************************************************/ + +/* ----- Hand made implementations of aligned malloc/free and realloc ----- */ + +/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned. + * Fast, but wastes 16 additional bytes of memory. Does not throw any exception. */ inline void* ei_handmade_aligned_malloc(size_t size) { void *original = std::malloc(size+16); + if (original == 0) return 0; void *aligned = reinterpret_cast((reinterpret_cast(original) & ~(size_t(15))) + 16); *(reinterpret_cast(aligned) - 1) = original; return aligned; } -/** \internal frees memory allocated with ei_handmade_aligned_malloc */ +/** \internal Frees memory allocated with ei_handmade_aligned_malloc */ inline void ei_handmade_aligned_free(void *ptr) { - if(ptr) - std::free(*(reinterpret_cast(ptr) - 1)); + if (ptr) std::free(*(reinterpret_cast(ptr) - 1)); } -/** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment. +/** \internal + * \brief Reallocates aligned memory. + * Since we know that our handmade version is based on std::realloc + * we can use std::realloc to implement efficient reallocation. + */ +inline void* ei_handmade_aligned_realloc(void* ptr, size_t size, size_t = 0) +{ + if (ptr == 0) return ei_handmade_aligned_malloc(size); + void *original = *(reinterpret_cast(ptr) - 1); + original = std::realloc(original,size+16); + if (original == 0) return 0; + void *aligned = reinterpret_cast((reinterpret_cast(original) & ~(size_t(15))) + 16); + *(reinterpret_cast(aligned) - 1) = original; + return aligned; +} + +/***************************************************************************** +*** Implementation of generic aligned realloc (when no realloc can be used)*** +*****************************************************************************/ + +void* ei_aligned_malloc(size_t size); +void ei_aligned_free(void *ptr); + +/** \internal + * \brief Reallocates aligned memory. + * Allows reallocation with aligned ptr types. This implementation will + * always create a new memory chunk and copy the old data. + */ +inline void* ei_generic_aligned_realloc(void* ptr, size_t size, size_t old_size) +{ + if (ptr==0) + return ei_aligned_malloc(size); + + if (size==0) + { + ei_aligned_free(ptr); + return 0; + } + + void* newptr = ei_aligned_malloc(size); + if (newptr == 0) + { + errno = ENOMEM; // according to the standard + return 0; + } + + if (ptr != 0) + { + std::memcpy(newptr, ptr, std::min(size,old_size)); + ei_aligned_free(ptr); + } + + return newptr; +} + +/***************************************************************************** +*** Implementation of portable aligned versions of malloc/free/realloc *** +*****************************************************************************/ + +/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment. * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown. */ inline void* ei_aligned_malloc(size_t size) @@ -85,9 +173,9 @@ inline void* ei_aligned_malloc(size_t size) void *result; #if !EIGEN_ALIGN - result = malloc(size); + result = std::malloc(size); #elif EIGEN_MALLOC_ALREADY_ALIGNED - result = malloc(size); + result = std::malloc(size); #elif EIGEN_HAS_POSIX_MEMALIGN if(posix_memalign(&result, 16, size)) result = 0; #elif EIGEN_HAS_MM_MALLOC @@ -105,7 +193,67 @@ inline void* ei_aligned_malloc(size_t size) return result; } -/** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned. +/** \internal Frees memory allocated with ei_aligned_malloc. */ +inline void ei_aligned_free(void *ptr) +{ + #if !EIGEN_ALIGN + std::free(ptr); + #elif EIGEN_MALLOC_ALREADY_ALIGNED + std::free(ptr); + #elif EIGEN_HAS_POSIX_MEMALIGN + std::free(ptr); + #elif EIGEN_HAS_MM_MALLOC + _mm_free(ptr); + #elif defined(_MSC_VER) + _aligned_free(ptr); + #else + ei_handmade_aligned_free(ptr); + #endif +} + +/** +* \internal +* \brief Reallocates an aligned block of memory. +* \throws std::bad_alloc if EIGEN_EXCEPTIONS are defined. +**/ +inline void* ei_aligned_realloc(void *ptr, size_t new_size, size_t old_size) +{ + (void)old_size; // Suppress 'unused variable' warning. Seen in boost tee. + + void *result; +#if !EIGEN_ALIGN + result = std::realloc(ptr,new_size); +#elif EIGEN_MALLOC_ALREADY_ALIGNED + result = std::realloc(ptr,new_size); +#elif EIGEN_HAS_POSIX_MEMALIGN + result = ei_generic_aligned_realloc(ptr,new_size,old_size); +#elif EIGEN_HAS_MM_MALLOC + // The defined(_mm_free) is just here to verify that this MSVC version + // implements _mm_malloc/_mm_free based on the corresponding _aligned_ + // functions. This may not always be the case and we just try to be safe. + #if defined(_MSC_VER) && defined(_mm_free) + result = _aligned_realloc(ptr,new_size,16); + #else + result = ei_generic_aligned_realloc(ptr,new_size,old_size); + #endif +#elif defined(_MSC_VER) + result = _aligned_realloc(ptr,new_size,16); +#else + result = ei_handmade_aligned_realloc(ptr,new_size,old_size); +#endif + +#ifdef EIGEN_EXCEPTIONS + if (result==0 && new_size!=0) + throw std::bad_alloc(); +#endif + return result; +} + +/***************************************************************************** +*** Implementation of conditionally aligned functions *** +*****************************************************************************/ + +/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned. * On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown. */ template inline void* ei_conditional_aligned_malloc(size_t size) @@ -126,7 +274,32 @@ template<> inline void* ei_conditional_aligned_malloc(size_t size) return result; } -/** \internal construct the elements of an array. +/** \internal Frees memory allocated with ei_conditional_aligned_malloc */ +template inline void ei_conditional_aligned_free(void *ptr) +{ + ei_aligned_free(ptr); +} + +template<> inline void ei_conditional_aligned_free(void *ptr) +{ + std::free(ptr); +} + +template inline void* ei_conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size) +{ + return ei_aligned_realloc(ptr, new_size, old_size); +} + +template<> inline void* ei_conditional_aligned_realloc(void* ptr, size_t new_size, size_t) +{ + return std::realloc(ptr, new_size); +} + +/***************************************************************************** +*** Construction/destruction of array elements *** +*****************************************************************************/ + +/** \internal Constructs the elements of an array. * The \a size parameter tells on how many objects to call the constructor of T. */ template inline T* ei_construct_elements_of_array(T *ptr, size_t size) @@ -135,7 +308,20 @@ template inline T* ei_construct_elements_of_array(T *ptr, size_t siz return ptr; } -/** allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment. +/** \internal Destructs the elements of an array. + * The \a size parameters tells on how many objects to call the destructor of T. + */ +template inline void ei_destruct_elements_of_array(T *ptr, size_t size) +{ + // always destruct an array starting from the end. + while(size) ptr[--size].~T(); +} + +/***************************************************************************** +*** Implementation of aligned new/delete-like functions *** +*****************************************************************************/ + +/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment. * On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown. * The default constructor of T is called. */ @@ -151,47 +337,7 @@ template inline T* ei_conditional_aligned_new(size_t siz return ei_construct_elements_of_array(result, size); } -/** \internal free memory allocated with ei_aligned_malloc - */ -inline void ei_aligned_free(void *ptr) -{ - #if !EIGEN_ALIGN - free(ptr); - #elif EIGEN_MALLOC_ALREADY_ALIGNED - free(ptr); - #elif EIGEN_HAS_POSIX_MEMALIGN - free(ptr); - #elif EIGEN_HAS_MM_MALLOC - _mm_free(ptr); - #elif defined(_MSC_VER) - _aligned_free(ptr); - #else - ei_handmade_aligned_free(ptr); - #endif -} - -/** \internal free memory allocated with ei_conditional_aligned_malloc - */ -template inline void ei_conditional_aligned_free(void *ptr) -{ - ei_aligned_free(ptr); -} - -template<> inline void ei_conditional_aligned_free(void *ptr) -{ - std::free(ptr); -} - -/** \internal destruct the elements of an array. - * The \a size parameters tells on how many objects to call the destructor of T. - */ -template inline void ei_destruct_elements_of_array(T *ptr, size_t size) -{ - // always destruct an array starting from the end. - while(size) ptr[--size].~T(); -} - -/** \internal delete objects constructed with ei_aligned_new +/** \internal Deletes objects constructed with ei_aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ template inline void ei_aligned_delete(T *ptr, size_t size) @@ -200,7 +346,7 @@ template inline void ei_aligned_delete(T *ptr, size_t size) ei_aligned_free(ptr); } -/** \internal delete objects constructed with ei_conditional_aligned_new +/** \internal Deletes objects constructed with ei_conditional_aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ template inline void ei_conditional_aligned_delete(T *ptr, size_t size) @@ -209,7 +355,17 @@ template inline void ei_conditional_aligned_delete(T *pt ei_conditional_aligned_free(ptr); } -/** \internal \returns the index of the first element of the array that is well aligned for vectorization. +template inline T* ei_conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size) +{ + T *result = reinterpret_cast(ei_conditional_aligned_realloc(reinterpret_cast(pts), sizeof(T)*new_size, sizeof(T)*old_size)); + if (new_size > old_size) + ei_construct_elements_of_array(result+old_size, new_size-old_size); + return result; +} + +/****************************************************************************/ + +/** \internal Returns the index of the first element of the array that is well aligned for vectorization. * * \param array the address of the start of the array * \param size the size of the array @@ -236,7 +392,7 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size) if(PacketSize==1) { // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements - // of the array have the same aligment. + // of the array have the same alignment. return 0; } else if(size_t(array) & (sizeof(Scalar)-1)) @@ -252,19 +408,23 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size) } } +/***************************************************************************** +*** Implementation of runtime stack allocation (falling back to malloc) *** +*****************************************************************************/ + /** \internal - * ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes - * on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and - * if stack allocation is supported by the platform (currently, this is linux only). - * Otherwise the memory is allocated on the heap. - * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE). + * Allocates an aligned buffer of SIZE bytes on the stack if SIZE is smaller than + * EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform + * (currently, this is Linux only). Otherwise the memory is allocated on the heap. + * Data allocated with ei_aligned_stack_alloc \b must be freed by calling + * ei_aligned_stack_free(PTR,SIZE). * \code * float * data = ei_aligned_stack_alloc(float,array.size()); * // ... * ei_aligned_stack_free(data,float,array.size()); * \endcode */ -#ifdef __linux__ +#if (defined __linux__) && !(defined __ARM_NEON__) #define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \ ? alloca(SIZE) \ : ei_aligned_malloc(SIZE) @@ -279,6 +439,10 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size) ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0) +/***************************************************************************** +*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** +*****************************************************************************/ + #if EIGEN_ALIGN #ifdef EIGEN_EXCEPTIONS #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ @@ -322,10 +486,11 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size) #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0)) +/****************************************************************************/ /** \class aligned_allocator * -* \brief stl compatible allocator to use with with 16 byte aligned types +* \brief STL compatible allocator to use with with 16 byte aligned types * * Example: * \code diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 619e7664d..5252b28c5 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -46,7 +46,7 @@ // if native static_assert is enabled, let's use it #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); - #else // CXX0X + #else // not CXX0X template struct ei_static_assert {}; @@ -81,7 +81,8 @@ BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES, + YOU_ALREADY_SPECIFIED_THIS_STRIDE }; }; diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index f1ed4ed9f..eff055b04 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -50,7 +50,7 @@ template class ei_int_if_dynamic { public: EIGEN_EMPTY_STRUCT_CTOR(ei_int_if_dynamic) - explicit ei_int_if_dynamic(int) {} + explicit ei_int_if_dynamic(int v) { EIGEN_ONLY_USED_FOR_DEBUG(v); ei_assert(v == Value); } static int value() { return Value; } void setValue(int) {} }; @@ -58,7 +58,7 @@ template class ei_int_if_dynamic template<> class ei_int_if_dynamic { int m_value; - ei_int_if_dynamic() {} + ei_int_if_dynamic() { ei_assert(false); } public: explicit ei_int_if_dynamic(int value) : m_value(value) {} int value() const { return m_value; } @@ -87,12 +87,17 @@ class ei_compute_matrix_flags { enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0, - inner_max_size = MaxCols==1 ? MaxRows - : MaxRows==1 ? MaxCols - : row_major_bit ? MaxCols : MaxRows, - is_big = inner_max_size == Dynamic, - is_packet_size_multiple = MaxRows==Dynamic || MaxCols==Dynamic || ((MaxCols*MaxRows) % ei_packet_traits::size) == 0, - aligned_bit = (((Options&DontAlign)==0) && (is_big || is_packet_size_multiple)) ? AlignedBit : 0, + is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, +#if !defined(__ARM_NEON__) + is_fixed_size_aligned + = (!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits::size) == 0), +#else +// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly + is_fixed_size_aligned = 0, +#endif + aligned_bit = (((Options&DontAlign)==0) + && (is_dynamic_size_storage || is_fixed_size_aligned)) + ? AlignedBit : 0, packet_access_bit = ei_packet_traits::size > 1 && aligned_bit ? PacketAccessBit : 0 }; @@ -214,7 +219,7 @@ struct ei_is_reference }; /** -* The reference selector for template expressions. The idea is that we don't +* \internal The reference selector for template expressions. The idea is that we don't * need to use references for expressions since they are light weight proxy * objects which should generate no copying overhead. **/ diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h index 5deac3247..c45151e82 100644 --- a/Eigen/src/Eigenvalues/ComplexSchur.h +++ b/Eigen/src/Eigenvalues/ComplexSchur.h @@ -154,6 +154,14 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) m_matT = hess.matrixH(); if(!skipU) m_matU = hess.matrixQ(); + + // Reduce the Hessenberg matrix m_matT to triangular form by QR iteration. + + // The matrix m_matT is divided in three parts. + // Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero. + // Rows il,...,iu is the part we are working on (the active submatrix). + // Rows iu+1,...,end are already brought in triangular form. + int iu = m_matT.cols() - 1; int il; RealScalar d,sd,sf; @@ -164,7 +172,7 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) int iter = 0; while(true) { - //locate the range in which to iterate + // find iu, the bottom row of the active submatrix while(iu > 0) { d = ei_norm1(m_matT.coeff(iu,iu)) + ei_norm1(m_matT.coeff(iu-1,iu-1)); @@ -183,10 +191,11 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) if(iter >= 30) { // FIXME : what to do when iter==MAXITER ?? - std::cerr << "MAXITER" << std::endl; + //std::cerr << "MAXITER" << std::endl; return; } + // find il, the top row of the active submatrix il = iu-1; while(il > 0) { @@ -202,16 +211,19 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) if( il != 0 ) m_matT.coeffRef(il,il-1) = Complex(0); - // compute the shift (the normalization by sf is to avoid under/overflow) + // compute the shift kappa as one of the eigenvalues of the 2x2 + // diagonal block on the bottom of the active submatrix + Matrix t = m_matT.template block<2,2>(iu-1,iu-1); sf = t.cwiseAbs().sum(); - t /= sf; + t /= sf; // the normalization by sf is to avoid under/overflow - c = t.determinant(); - b = t.diagonal().sum(); - - disc = ei_sqrt(b*b - RealScalar(4)*c); + b = t.coeff(0,1) * t.coeff(1,0); + c = t.coeff(0,0) - t.coeff(1,1); + disc = ei_sqrt(c*c + RealScalar(4)*b); + c = t.coeff(0,0) * t.coeff(1,1) - b; + b = t.coeff(0,0) + t.coeff(1,1); r1 = (b+disc)/RealScalar(2); r2 = (b-disc)/RealScalar(2); @@ -224,6 +236,12 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) kappa = sf * r1; else kappa = sf * r2; + + if (iter == 10 || iter == 20) + { + // exceptional shift, taken from http://www.netlib.org/eispack/comqr.f + kappa = ei_abs(ei_real(m_matT.coeff(iu,iu-1))) + ei_abs(ei_real(m_matT.coeff(iu-1,iu-2))); + } // perform the QR step using Givens rotations PlanarRotation rot; @@ -246,18 +264,6 @@ void ComplexSchur::compute(const MatrixType& matrix, bool skipU) } } - // FIXME : is it necessary ? - /* - for(int i=0 ; i operator* (const Translation& t) const; /** Concatenates a uniform scaling and an affine transformation */ - template - inline Transform operator* (const Transform& t) const; + template + inline Transform operator* (const Transform& t) const; /** Concatenates a uniform scaling and a linear transformation matrix */ // TODO returns an expression @@ -156,4 +156,27 @@ typedef DiagonalMatrix AlignedScaling3f; typedef DiagonalMatrix AlignedScaling3d; //@} +template +template +inline Transform +UniformScaling::operator* (const Translation& t) const +{ + Transform res; + res.matrix().setZero(); + res.linear().diagonal().fill(factor()); + res.translation() = factor() * t.vector(); + res(Dim,Dim) = Scalar(1); + return res; +} + +template +template +inline Transform +UniformScaling::operator* (const Transform& t) const +{ + Transform res = t; + res.prescale(factor()); + return res; +} + #endif // EIGEN_SCALING_H diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h index fb6577f08..d0b70a31c 100644 --- a/Eigen/src/LU/Determinant.h +++ b/Eigen/src/LU/Determinant.h @@ -69,7 +69,7 @@ template struct ei_determinant_impl template struct ei_determinant_impl { - static typename ei_traits::Scalar run(const Derived& m) + static inline typename ei_traits::Scalar run(const Derived& m) { return ei_bruteforce_det3_helper(m,0,1,2) - ei_bruteforce_det3_helper(m,1,0,2) @@ -100,8 +100,7 @@ inline typename ei_traits::Scalar MatrixBase::determinant() co { assert(rows() == cols()); typedef typename ei_nested::type Nested; - Nested nested(derived()); - return ei_determinant_impl::type>::run(nested); + return ei_determinant_impl::type>::run(derived()); } #endif // EIGEN_DETERMINANT_H diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 9afc448cc..dea6ec41c 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -119,7 +119,7 @@ template class FullPivLU * diagonal coefficient of U. */ RealScalar maxPivot() const { return m_maxpivot; } - + /** \returns the permutation matrix P * * \sa permutationQ() @@ -361,6 +361,8 @@ template class FullPivLU (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); } + MatrixType reconstructedMatrix() const; + inline int rows() const { return m_lu.rows(); } inline int cols() const { return m_lu.cols(); } @@ -404,6 +406,7 @@ FullPivLU& FullPivLU::compute(const MatrixType& matrix) m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_maxpivot = RealScalar(0); + RealScalar cutoff(0); for(int k = 0; k < size; ++k) { @@ -418,8 +421,14 @@ FullPivLU& FullPivLU::compute(const MatrixType& matrix) row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner, col_of_biggest_in_corner += k; // need to add k to them. - // if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values - if(biggest_in_corner == RealScalar(0)) + // when k==0, biggest_in_corner is the biggest coeff absolute value in the original matrix + if(k == 0) cutoff = biggest_in_corner * NumTraits::epsilon(); + + // if the pivot (hence the corner) is "zero", terminate to avoid generating nan/inf values. + // Notice that using an exact comparison (biggest_in_corner==0) here, as Golub-van Loan do in + // their pseudo-code, results in numerical instability! The cutoff here has been validated + // by running the unit test 'lu' with many repetitions. + if(biggest_in_corner < cutoff) { // before exiting, make sure to initialize the still uninitialized transpositions // in a sane state without destroying what we already have. @@ -480,6 +489,31 @@ typename ei_traits::Scalar FullPivLU::determinant() cons return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod()); } +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^{-1} L U Q^{-1}. + * This function is provided for debug purpose. */ +template +MatrixType FullPivLU::reconstructedMatrix() const +{ + ei_assert(m_isInitialized && "LU is not initialized."); + const int smalldim = std::min(m_lu.rows(), m_lu.cols()); + // LU + MatrixType res(m_lu.rows(),m_lu.cols()); + // FIXME the .toDenseMatrix() should not be needed... + res = m_lu.corner(TopLeft,m_lu.rows(),smalldim) + .template triangularView().toDenseMatrix() + * m_lu.corner(TopLeft,smalldim,m_lu.cols()) + .template triangularView().toDenseMatrix(); + + // P^{-1}(LU) + res = m_p.inverse() * res; + + // (P^{-1}LU)Q^{-1} + res = res * m_q.inverse(); + + return res; +} + /********* Implementation of kernel() **************************************************/ template diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h index e20da70d6..116a614e1 100644 --- a/Eigen/src/LU/Inverse.h +++ b/Eigen/src/LU/Inverse.h @@ -123,7 +123,7 @@ struct ei_compute_inverse_and_det_with_check ****************************/ template -void ei_compute_inverse_size3_helper( +inline void ei_compute_inverse_size3_helper( const MatrixType& matrix, const typename ResultType::Scalar& invdet, const Matrix& cofactors_col0, diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index ed2354d78..df36cb04d 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -165,6 +165,8 @@ template class PartialPivLU */ typename ei_traits::Scalar determinant() const; + MatrixType reconstructedMatrix() const; + inline int rows() const { return m_lu.rows(); } inline int cols() const { return m_lu.cols(); } @@ -194,7 +196,7 @@ PartialPivLU::PartialPivLU(const MatrixType& matrix) compute(matrix); } -/** This is the blocked version of ei_fullpivlu_unblocked() */ +/** \internal This is the blocked version of ei_fullpivlu_unblocked() */ template struct ei_partial_lu_impl { @@ -368,7 +370,7 @@ void ei_partial_lu_inplace(MatrixType& lu, IntVector& row_transpositions, int& n ei_partial_lu_impl - ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.stride(), &row_transpositions.coeffRef(0), nb_transpositions); + ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions); } template @@ -400,6 +402,23 @@ typename ei_traits::Scalar PartialPivLU::determinant() c return Scalar(m_det_p) * m_lu.diagonal().prod(); } +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^{-1} L U. + * This function is provided for debug purpose. */ +template +MatrixType PartialPivLU::reconstructedMatrix() const +{ + ei_assert(m_isInitialized && "LU is not initialized."); + // LU + MatrixType res = m_lu.template triangularView().toDenseMatrix() + * m_lu.template triangularView(); + + // P^{-1}(LU) + res = m_p.inverse() * res; + + return res; +} + /***** Implementation of solve() *****************************************************/ template diff --git a/Eigen/src/Sparse/CholmodSupport.h b/Eigen/src/Sparse/CholmodSupport.h index fd33b1507..fbd035ce4 100644 --- a/Eigen/src/Sparse/CholmodSupport.h +++ b/Eigen/src/Sparse/CholmodSupport.h @@ -99,7 +99,7 @@ cholmod_dense ei_cholmod_map_eigen_to_dense(MatrixBase& mat) res.nrow = mat.rows(); res.ncol = mat.cols(); res.nzmax = res.nrow * res.ncol; - res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().stride(); + res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride(); res.x = mat.derived().data(); res.z = 0; @@ -233,7 +233,7 @@ bool SparseLLT::solveInPlace(MatrixBase &b) const cholmod_dense* x = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &cdb, &m_cholmod); if(!x) { - std::cerr << "Eigen: cholmod_solve failed\n"; + //std::cerr << "Eigen: cholmod_solve failed\n"; return false; } b = Matrix::Map(reinterpret_cast(x->x),b.rows()); diff --git a/Eigen/src/Sparse/DynamicSparseMatrix.h b/Eigen/src/Sparse/DynamicSparseMatrix.h index 2594ffebc..d73dce229 100644 --- a/Eigen/src/Sparse/DynamicSparseMatrix.h +++ b/Eigen/src/Sparse/DynamicSparseMatrix.h @@ -236,7 +236,7 @@ class DynamicSparseMatrix { // remove all coefficients with innerCoord>=innerSize // TODO - std::cerr << "not implemented yet\n"; + //std::cerr << "not implemented yet\n"; exit(2); } if (m_data.size() != outerSize) diff --git a/Eigen/src/Sparse/SuperLUSupport.h b/Eigen/src/Sparse/SuperLUSupport.h index 1a765c75b..18a967539 100644 --- a/Eigen/src/Sparse/SuperLUSupport.h +++ b/Eigen/src/Sparse/SuperLUSupport.h @@ -161,7 +161,7 @@ struct SluMatrix : SuperMatrix res.nrow = mat.rows(); res.ncol = mat.cols(); - res.storage.lda = MatrixType::IsVectorAtCompileTime ? mat.size() : mat.stride(); + res.storage.lda = MatrixType::IsVectorAtCompileTime ? mat.size() : mat.outerStride(); res.storage.values = mat.data(); return res; } @@ -217,7 +217,7 @@ struct SluMatrixMapHelper > res.nrow = mat.rows(); res.ncol = mat.cols(); - res.storage.lda = mat.stride(); + res.storage.lda = mat.outerStride(); res.storage.values = mat.data(); } }; @@ -397,7 +397,7 @@ void SparseLU::compute(const MatrixType& a) case MinimumDegree_ATA : m_sluOptions.ColPerm = MMD_ATA; break; case ColApproxMinimumDegree : m_sluOptions.ColPerm = COLAMD; break; default: - std::cerr << "Eigen: ordering method \"" << Base::orderingMethod() << "\" not supported by the SuperLU backend\n"; + //std::cerr << "Eigen: ordering method \"" << Base::orderingMethod() << "\" not supported by the SuperLU backend\n"; m_sluOptions.ColPerm = NATURAL; }; @@ -448,7 +448,7 @@ void SparseLU::compute(const MatrixType& a) &recip_pivot_gross, &rcond, &m_sluStat, &info, Scalar()); #else - std::cerr << "Incomplete factorization is only available in SuperLU v4\n"; + //std::cerr << "Incomplete factorization is only available in SuperLU v4\n"; Base::m_succeeded = false; return; #endif @@ -486,7 +486,7 @@ bool SparseLU::solve(const MatrixBase &b, case SvTranspose : m_sluOptions.Trans = TRANS; break; case SvAdjoint : m_sluOptions.Trans = CONJ; break; default: - std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the SuperLU backend\n"; + //std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the SuperLU backend\n"; m_sluOptions.Trans = NOTRANS; } @@ -513,7 +513,7 @@ bool SparseLU::solve(const MatrixBase &b, &recip_pivot_gross, &rcond, &m_sluStat, &info, Scalar()); #else - std::cerr << "Incomplete factorization is only available in SuperLU v4\n"; + //std::cerr << "Incomplete factorization is only available in SuperLU v4\n"; return false; #endif } diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index e49afa07f..0a0a5e154 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2010 Gael Guennebaud // Copyright (C) 2009 Benoit Jacob // // Eigen is free software; you can redistribute it and/or @@ -27,18 +27,20 @@ #define EIGEN_BENCH_TIMERR_H #if defined(_WIN32) || defined(__CYGWIN__) -#define NOMINMAX -#define WIN32_LEAN_AND_MEAN -#include +# ifndef NOMINMAX +# define NOMINMAX +# define EIGEN_BT_UNDEF_NOMINMAX +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN +# endif +# include #else -#include -#include -#include +# include #endif -#include -#include -#include +#include namespace Eigen { @@ -126,14 +128,13 @@ public: inline double getRealTime() { #ifdef WIN32 - SYSTEMTIME st; - GetSystemTime(&st); - return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds; + SYSTEMTIME st; + GetSystemTime(&st); + return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds; #else - struct timeval tv; - struct timezone tz; - gettimeofday(&tv, &tz); - return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec); #endif } @@ -161,4 +162,15 @@ protected: } +// clean #defined tokens +#ifdef EIGEN_BT_UNDEF_NOMINMAX +# undef EIGEN_BT_UNDEF_NOMINMAX +# undef NOMINMAX +#endif + +#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN +# undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN +# undef WIN32_LEAN_AND_MEAN +#endif + #endif // EIGEN_BENCH_TIMERR_H diff --git a/bench/bench_unrolling b/bench/bench_unrolling index bf01cce7d..826443845 100755 --- a/bench/bench_unrolling +++ b/bench/bench_unrolling @@ -2,10 +2,11 @@ # gcc : CXX="g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000" # icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions" +CXX=${CXX-g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value for ((i=1; i<16; ++i)); do echo "Matrix size: $i x $i :" - $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null + $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null echo " " done diff --git a/bench/benchmark_suite b/bench/benchmark_suite index a8fc6dced..3f21d3661 100755 --- a/bench/benchmark_suite +++ b/bench/benchmark_suite @@ -1,4 +1,5 @@ #!/bin/bash +CXX=${CXX-g++} # default value unless caller has defined CXX echo "Fixed size 3x3, column-major, -DNDEBUG" $CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null echo "Fixed size 3x3, column-major, with asserts" diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index a6c330a5c..e71076f9d 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -2,9 +2,10 @@ project(EigenBlas) add_custom_target(blas) -set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp) +set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp) -add_library(eigen_blas SHARED ${EigenBlas_SRCS}) +add_library(eigen_blas ${EigenBlas_SRCS}) +# add_library(eigen_blas SHARED ${EigenBlas_SRCS}) add_dependencies(blas eigen_blas) install(TARGETS eigen_blas diff --git a/blas/common.h b/blas/common.h index e7bfda570..8b9c6ff09 100644 --- a/blas/common.h +++ b/blas/common.h @@ -25,6 +25,8 @@ #ifndef EIGEN_BLAS_COMMON_H #define EIGEN_BLAS_COMMON_H +#include + #ifndef SCALAR #error the token SCALAR must be defined to compile this file #endif @@ -34,13 +36,12 @@ extern "C" { #endif -#include +#include "../bench/btl/libs/C_BLAS/blas.h" #ifdef __cplusplus } #endif - #define NOTR 0 #define TR 1 #define ADJ 2 @@ -75,27 +76,6 @@ extern "C" #include using namespace Eigen; -template -Block >, Dynamic, Dynamic> -matrix(T* data, int rows, int cols, int stride) -{ - return Map >(data, stride, cols).block(0,0,rows,cols); -} - -template -Block >, Dynamic, 1> -vector(T* data, int size, int incr) -{ - return Map >(data, size, incr).col(0); -} - -template -Map > -vector(T* data, int size) -{ - return Map >(data, size); -} - typedef SCALAR Scalar; typedef NumTraits::Real RealScalar; typedef std::complex Complex; @@ -106,10 +86,29 @@ enum Conj = IsComplex }; -typedef Block >, Dynamic, Dynamic> MatrixType; -typedef Block >, Dynamic, 1> StridedVectorType; +typedef Map, 0, OuterStride > MatrixType; +typedef Map, 0, InnerStride > StridedVectorType; typedef Map > CompactVectorType; +template +Map, 0, OuterStride > +matrix(T* data, int rows, int cols, int stride) +{ + return Map, 0, OuterStride >(data, rows, cols, OuterStride(stride)); +} + +template +Map, 0, InnerStride > vector(T* data, int size, int incr) +{ + return Map, 0, InnerStride >(data, size, InnerStride(incr)); +} + +template +Map > vector(T* data, int size) +{ + return Map >(data, size); +} + #define EIGEN_BLAS_FUNC(X) EIGEN_CAT(SCALAR_SUFFIX,X##_) #endif // EIGEN_BLAS_COMMON_H diff --git a/blas/complex_double.cpp b/blas/complex_double.cpp index f51ccb25b..fd6db03b5 100644 --- a/blas/complex_double.cpp +++ b/blas/complex_double.cpp @@ -23,7 +23,8 @@ // Eigen. If not, see . #define SCALAR std::complex -#define SCALAR_SUFFIX c +#define SCALAR_SUFFIX z +#define REAL_SCALAR_SUFFIX d #define ISCOMPLEX 1 #include "level1_impl.h" diff --git a/blas/complex_single.cpp b/blas/complex_single.cpp index b6617e7b9..54c7ca1e5 100644 --- a/blas/complex_single.cpp +++ b/blas/complex_single.cpp @@ -23,7 +23,8 @@ // Eigen. If not, see . #define SCALAR std::complex -#define SCALAR_SUFFIX z +#define SCALAR_SUFFIX c +#define REAL_SCALAR_SUFFIX s #define ISCOMPLEX 1 #include "level1_impl.h" diff --git a/blas/level1_impl.h b/blas/level1_impl.h index c508626db..9f3e4d166 100644 --- a/blas/level1_impl.h +++ b/blas/level1_impl.h @@ -30,52 +30,111 @@ int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx, Scalar* y = reinterpret_cast(py); Scalar alpha = *reinterpret_cast(palpha); - if(*incx==1 && *incy==1) - vector(y,*n) += alpha * vector(x,*n); - else - vector(y,*n,*incy) += alpha * vector(x,*n,*incx); +// std::cerr << "axpy " << *n << " " << alpha << " " << *incx << " " << *incy << "\n"; - return 1; + if(*incx==1 && *incy==1) vector(y,*n) += alpha * vector(x,*n); + else if(*incx>0 && *incy>0) vector(y,*n,*incy) += alpha * vector(x,*n,*incx); + else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse() += alpha * vector(x,*n,*incx); + else if(*incx<0 && *incy>0) vector(y,*n,*incy) += alpha * vector(x,*n,-*incx).reverse(); + else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse() += alpha * vector(x,*n,-*incx).reverse(); + + return 0; } +#if !ISCOMPLEX // computes the sum of magnitudes of all vector elements or, for a complex vector x, the sum // res = |Rex1| + |Imx1| + |Rex2| + |Imx2| + ... + |Rexn| + |Imxn|, where x is a vector of order n RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx) { - int size = IsComplex ? 2* *n : *n; +// std::cerr << "_asum " << *n << " " << *incx << "\n"; - if(*incx==1) - return vector(px,size).cwise().abs().sum(); - else - return vector(px,size,*incx).cwise().abs().sum(); + Scalar* x = reinterpret_cast(px); - return 1; + if(*n<=0) return 0; + + if(*incx==1) return vector(x,*n).cwiseAbs().sum(); + else return vector(x,*n,std::abs(*incx)).cwiseAbs().sum(); } +#else + +struct ei_scalar_norm1_op { + typedef RealScalar result_type; + EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_norm1_op) + inline RealScalar operator() (const Scalar& a) const { return ei_norm1(a); } +}; +namespace Eigen { +template<> struct ei_functor_traits +{ + enum { Cost = 3 * NumTraits::AddCost, PacketAccess = 0 }; +}; +} + +RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),asum_)(int *n, RealScalar *px, int *incx) +{ +// std::cerr << "__asum " << *n << " " << *incx << "\n"; + + Complex* x = reinterpret_cast(px); + + if(*n<=0) return 0; + + if(*incx==1) return vector(x,*n).unaryExpr().sum(); + else return vector(x,*n,std::abs(*incx)).unaryExpr().sum(); +} +#endif int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { - int size = IsComplex ? 2* *n : *n; +// std::cerr << "_copy " << *n << " " << *incx << " " << *incy << "\n"; - if(*incx==1 && *incy==1) - vector(py,size) = vector(px,size); - else - vector(py,size,*incy) = vector(px,size,*incx); + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); - return 1; + if(*incx==1 && *incy==1) vector(y,*n) = vector(x,*n); + else if(*incx>0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,*incx); + else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,*incx); + else if(*incx<0 && *incy>0) vector(y,*n,*incy) = vector(x,*n,-*incx).reverse(); + else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse() = vector(x,*n,-*incx).reverse(); + + return 0; } // computes a vector-vector dot product. Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { +// std::cerr << "_dot " << *n << " " << *incx << " " << *incy << "\n"; + + if(*n<=0) + return 0; + Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - if(*incx==1 && *incy==1) - return (vector(x,*n).cwise()*vector(y,*n)).sum(); - - return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum(); + if(*incx==1 && *incy==1) return (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); + else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); + else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum(); + else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + else return 0; } +int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amax_)(int *n, RealScalar *px, int *incx) +{ +// std::cerr << "i_amax " << *n << " " << *incx << "\n"; + + Scalar* x = reinterpret_cast(px); + + if(*n<=0) + return 0; + + int ret; + + if(*incx==1) vector(x,*n).cwiseAbs().maxCoeff(&ret); + else vector(x,*n,std::abs(*incx)).cwiseAbs().maxCoeff(&ret); + + return ret+1; +} + + /* // computes a vector-vector dot product with extended precision. @@ -96,53 +155,100 @@ Scalar EIGEN_BLAS_FUNC(sdot)(int *n, RealScalar *px, int *incx, RealScalar *py, #if ISCOMPLEX // computes a dot product of a conjugated vector with another vector. -Scalar EIGEN_BLAS_FUNC(dotc)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +void EIGEN_BLAS_FUNC(dotc)(RealScalar* dot, int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { + + std::cerr << "Eigen BLAS: _dotc is not implemented yet\n"; + + return; + + // TODO: find how to return a complex to fortran + +// std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n"; + Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); if(*incx==1 && *incy==1) - return vector(x,*n).dot(vector(y,*n)); - - return vector(x,*n,*incx).dot(vector(y,*n,*incy)); + *reinterpret_cast(dot) = vector(x,*n).dot(vector(y,*n)); + else + *reinterpret_cast(dot) = vector(x,*n,*incx).dot(vector(y,*n,*incy)); } // computes a vector-vector dot product without complex conjugation. -Scalar EIGEN_BLAS_FUNC(dotu)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) +void EIGEN_BLAS_FUNC(dotu)(RealScalar* dot, int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { + std::cerr << "Eigen BLAS: _dotu is not implemented yet\n"; + + return; + + // TODO: find how to return a complex to fortran + +// std::cerr << "_dotu " << *n << " " << *incx << " " << *incy << "\n"; + Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); if(*incx==1 && *incy==1) - return (vector(x,*n).cwise()*vector(y,*n)).sum(); - - return (vector(x,*n,*incx).cwise()*vector(y,*n,*incy)).sum(); + *reinterpret_cast(dot) = (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); + else + *reinterpret_cast(dot) = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); } #endif // ISCOMPLEX +#if !ISCOMPLEX // computes the Euclidean norm of a vector. Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx) { +// std::cerr << "_nrm2 " << *n << " " << *incx << "\n"; Scalar* x = reinterpret_cast(px); + if(*n<=0) + return 0; + + if(*incx==1) return vector(x,*n).norm(); + else return vector(x,*n,std::abs(*incx)).norm(); +} +#else +RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),nrm2_)(int *n, RealScalar *px, int *incx) +{ +// std::cerr << "__nrm2 " << *n << " " << *incx << "\n"; + Scalar* x = reinterpret_cast(px); + + if(*n<=0) + return 0; + if(*incx==1) return vector(x,*n).norm(); return vector(x,*n,*incx).norm(); } +#endif int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) { +// std::cerr << "_rot " << *n << " " << *incx << " " << *incy << "\n"; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); Scalar c = *reinterpret_cast(pc); Scalar s = *reinterpret_cast(ps); - StridedVectorType vx(vector(x,*n,*incx)); - StridedVectorType vy(vector(y,*n,*incy)); - ei_apply_rotation_in_the_plane(vx, vy, PlanarRotation(c,s)); - return 1; + if(*n<=0) + return 0; + + StridedVectorType vx(vector(x,*n,std::abs(*incx))); + StridedVectorType vy(vector(y,*n,std::abs(*incy))); + + Reverse rvx(vx); + Reverse rvy(vy); + + if(*incx<0 && *incy>0) ei_apply_rotation_in_the_plane(rvx, vy, PlanarRotation(c,s)); + else if(*incx>0 && *incy<0) ei_apply_rotation_in_the_plane(vx, rvy, PlanarRotation(c,s)); + else ei_apply_rotation_in_the_plane(vx, vy, PlanarRotation(c,s)); + + + return 0; } int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) @@ -157,7 +263,7 @@ int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealSc *c = r.c(); *s = r.s(); - return 1; + return 0; } #if !ISCOMPLEX @@ -183,43 +289,56 @@ int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealS */ #endif // !ISCOMPLEX -int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *px, int *incx, RealScalar *palpha) +int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) { Scalar* x = reinterpret_cast(px); Scalar alpha = *reinterpret_cast(palpha); - if(*incx==1) - vector(x,*n) *= alpha; +// std::cerr << "_scal " << *n << " " << alpha << " " << *incx << "\n"; - vector(x,*n,*incx) *= alpha; + if(*n<=0) + return 0; - return 1; + if(*incx==1) vector(x,*n) *= alpha; + else vector(x,*n,std::abs(*incx)) *= alpha; + + return 0; } +#if ISCOMPLEX +int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),scal_)(int *n, RealScalar *palpha, RealScalar *px, int *incx) +{ + Scalar* x = reinterpret_cast(px); + RealScalar alpha = *palpha; + +// std::cerr << "__scal " << *n << " " << alpha << " " << *incx << "\n"; + + if(*n<=0) + return 0; + + if(*incx==1) vector(x,*n) *= alpha; + else vector(x,*n,std::abs(*incx)) *= alpha; + + return 0; +} +#endif // ISCOMPLEX + int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy) { - int size = IsComplex ? 2* *n : *n; +// std::cerr << "_swap " << *n << " " << *incx << " " << *incy << "\n"; - if(*incx==1 && *incy==1) - vector(py,size).swap(vector(px,size)); - else - vector(py,size,*incy).swap(vector(px,size,*incx)); + Scalar* x = reinterpret_cast(px); + Scalar* y = reinterpret_cast(py); + + if(*n<=0) + return 0; + + if(*incx==1 && *incy==1) vector(y,*n).swap(vector(x,*n)); + else if(*incx>0 && *incy>0) vector(y,*n,*incy).swap(vector(x,*n,*incx)); + else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse().swap(vector(x,*n,*incx)); + else if(*incx<0 && *incy>0) vector(y,*n,*incy).swap(vector(x,*n,-*incx).reverse()); + else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse().swap(vector(x,*n,-*incx).reverse()); return 1; } -#if !ISCOMPLEX - -RealScalar EIGEN_BLAS_FUNC(casum)(int *n, RealScalar *px, int *incx) -{ - Complex* x = reinterpret_cast(px); - - if(*incx==1) - return vector(x,*n).cwise().abs().sum(); - else - return vector(x,*n,*incx).cwise().abs().sum(); - - return 1; -} - -#endif // ISCOMPLEX diff --git a/blas/level2_impl.h b/blas/level2_impl.h index 5691e8a7f..68be9a806 100644 --- a/blas/level2_impl.h +++ b/blas/level2_impl.h @@ -56,9 +56,11 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca return 1; } -/* + int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) { + return 0; + typedef void (*functype)(int, const Scalar *, int, Scalar *, int); functype func[16]; @@ -95,13 +97,14 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar return 0; func[code](*n, a, *lda, b, *incb); - return 1; + return 0; } -*/ -/* + + int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) { + return 0; // TODO typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int); @@ -140,13 +143,21 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar return 0; func[code](*n, a, *lda, b, *incb, b, *incb); - return 1; + return 0; +} + +// y = alpha*A*x + beta*y +int EIGEN_BLAS_FUNC(ssymv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +{ + return 0; + + // TODO } -*/ -/* int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pc, int *ldc) { + return 0; + // TODO typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar); functype func[2]; @@ -174,11 +185,13 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, func[code](*n, a, *inca, c, *ldc, alpha); return 1; } -*/ -/* + + int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *inca, RealScalar *pb, int *incb, RealScalar *pc, int *ldc) { + return 0; + // TODO typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); functype func[2]; @@ -207,7 +220,7 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa func[code](*n, a, *inca, b, *incb, c, *ldc, alpha); return 1; } -*/ + #if ISCOMPLEX diff --git a/blas/level3_impl.h b/blas/level3_impl.h index d44de1b5d..6a0e64392 100644 --- a/blas/level3_impl.h +++ b/blas/level3_impl.h @@ -26,8 +26,9 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) { +// std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n"; typedef void (*functype)(int, int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); - functype func[12]; + static functype func[12]; static bool init = false; if(!init) @@ -52,21 +53,29 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); - if(beta!=Scalar(1)) - matrix(c, *m, *n, *ldc) *= beta; - int code = OP(*opa) | (OP(*opb) << 2); - if(code>=12 || func[code]==0) + if(code>=12 || func[code]==0 || (*m<0) || (*n<0) || (*k<0)) + { + int info = 1; + xerbla_("GEMM", &info, 4); return 0; + } + + if(beta!=Scalar(1)) + if(beta==Scalar(0)) + matrix(c, *m, *n, *ldc).setZero(); + else + matrix(c, *m, *n, *ldc) *= beta; func[code](*m, *n, *k, a, *lda, b, *ldb, c, *ldc, alpha); - return 1; + return 0; } int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) { +// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n"; typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int); - functype func[32]; + static functype func[32]; static bool init = false; if(!init) @@ -74,38 +83,38 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, for(int k=0; k<32; ++k) func[k] = 0; - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_triangular_solve_matrix::run); - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_triangular_solve_matrix::run); init = true; } @@ -114,14 +123,23 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, Scalar* b = reinterpret_cast(pb); Scalar alpha = *reinterpret_cast(palpha); - // TODO handle alpha - int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); - if(code>=32 || func[code]==0) + if(code>=32 || func[code]==0 || *m<0 || *n <0) + { + int info=1; + xerbla_("TRSM",&info,4); return 0; + } - func[code](*m, *n, a, *lda, b, *ldb); - return 1; + if(SIDE(*side)==LEFT) + func[code](*m, *n, a, *lda, b, *ldb); + else + func[code](*n, *m, a, *lda, b, *ldb); + + if(alpha!=Scalar(1)) + matrix(b,*m,*n,*ldb) *= alpha; + + return 0; } @@ -129,46 +147,46 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, // b = alpha*b*op(a) for side = 'R'or'r' int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) { +// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n"; typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); - functype func[32]; - + static functype func[32]; static bool init = false; if(!init) { for(int k=0; k<32; ++k) func[k] = 0; - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); + func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (ei_product_triangular_matrix_matrix::run); init = true; } @@ -178,10 +196,21 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, Scalar alpha = *reinterpret_cast(palpha); int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); - if(code>=32 || func[code]==0) + if(code>=32 || func[code]==0 || *m<0 || *n <0) + { + int info=1; + xerbla_("TRMM",&info,4); return 0; + } - func[code](*m, *n, a, *lda, b, *ldb, b, *ldb, alpha); + // FIXME find a way to avoid this copy + Matrix tmp = matrix(b,*m,*n,*ldb); + matrix(b,*m,*n,*ldb).setZero(); + + if(SIDE(*side)==LEFT) + func[code](*m, *n, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha); + else + func[code](*n, *m, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha); return 1; } @@ -189,41 +218,45 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, // c = alpha*b*a + beta*c for side = 'R'or'r int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) { +// std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n"; Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); + if(*m<0 || *n<0) + { + int info=1; + xerbla_("SYMM",&info,4); + return 0; + } + if(beta!=Scalar(1)) - matrix(c, *m, *n, *ldc) *= beta; + if(beta==Scalar(0)) matrix(c, *m, *n, *ldc).setZero(); + else matrix(c, *m, *n, *ldc) *= beta; if(SIDE(*side)==LEFT) - if(UPLO(*uplo)==UP) - ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) - ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); - else - return 0; + if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else return 0; else if(SIDE(*side)==RIGHT) - if(UPLO(*uplo)==UP) - ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) - ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); - else - return 0; + if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else return 0; else return 0; - return 1; + return 0; } // c = alpha*a*a' + beta*c for op = 'N'or'n' // c = alpha*a'*a + beta*c for op = 'T'or't','C'or'c' int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) { +// std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar); - functype func[8]; + static functype func[8]; static bool init = false; if(!init) @@ -231,13 +264,13 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp for(int k=0; k<8; ++k) func[k] = 0; - func[NOTR | (UP << 2)] = (ei_selfadjoint_product::run); - func[TR | (UP << 2)] = (ei_selfadjoint_product::run); - func[ADJ | (UP << 2)] = (ei_selfadjoint_product::run); + func[NOTR | (UP << 2)] = (ei_selfadjoint_product::run); + func[TR | (UP << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (UP << 2)] = (ei_selfadjoint_product::run); - func[NOTR | (LO << 2)] = (ei_selfadjoint_product::run); - func[TR | (LO << 2)] = (ei_selfadjoint_product::run); - func[ADJ | (LO << 2)] = (ei_selfadjoint_product::run); + func[NOTR | (LO << 2)] = (ei_selfadjoint_product::run); + func[TR | (LO << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (LO << 2)] = (ei_selfadjoint_product::run); init = true; } @@ -248,14 +281,20 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp Scalar beta = *reinterpret_cast(pbeta); int code = OP(*op) | (UPLO(*uplo) << 2); - if(code>=8 || func[code]==0) + if(code>=8 || func[code]==0 || *n<0 || *k<0) + { + int info=1; + xerbla_("SYRK",&info,4); return 0; + } if(beta!=Scalar(1)) - matrix(c, *n, *n, *ldc) *= beta; + if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView() *= beta; + else matrix(c, *n, *n, *ldc).triangularView() *= beta; func[code](*n, *k, a, *lda, c, *ldc, alpha); - return 1; + + return 0; } // c = alpha*a*b' + alpha*b*a' + beta*c for op = 'N'or'n' @@ -269,6 +308,7 @@ int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal Scalar beta = *reinterpret_cast(pbeta); // TODO + std::cerr << "Eigen BLAS: _syr2k is not implemented yet\n"; return 0; } @@ -286,27 +326,30 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); +// std::cerr << "in hemm " << *side << " " << *uplo << " " << *m << " " << *n << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n"; + + if(*m<0 || *n<0) + { + return 0; + } + if(beta!=Scalar(1)) matrix(c, *m, *n, *ldc) *= beta; if(SIDE(*side)==LEFT) - if(UPLO(*uplo)==UP) - ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) - ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); - else - return 0; + if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + else return 0; else if(SIDE(*side)==RIGHT) - if(UPLO(*uplo)==UP) - ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) - ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); - else - return 0; + if(UPLO(*uplo)==UP) ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else if(UPLO(*uplo)==LO) ei_product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + else return 0; else + { return 0; + } - return 1; + return 0; } // c = alpha*a*conj(a') + beta*c for op = 'N'or'n' @@ -314,7 +357,7 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) { typedef void (*functype)(int, int, const Scalar *, int, Scalar *, int, Scalar); - functype func[8]; + static functype func[8]; static bool init = false; if(!init) @@ -322,29 +365,46 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp for(int k=0; k<8; ++k) func[k] = 0; - func[NOTR | (UP << 2)] = (ei_selfadjoint_product::run); - func[ADJ | (UP << 2)] = (ei_selfadjoint_product::run); + func[NOTR | (UP << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (UP << 2)] = (ei_selfadjoint_product::run); - func[NOTR | (LO << 2)] = (ei_selfadjoint_product::run); - func[ADJ | (LO << 2)] = (ei_selfadjoint_product::run); + func[NOTR | (LO << 2)] = (ei_selfadjoint_product::run); + func[ADJ | (LO << 2)] = (ei_selfadjoint_product::run); init = true; } Scalar* a = reinterpret_cast(pa); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + RealScalar alpha = *palpha; + RealScalar beta = *pbeta; + +// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n"; + + if(*n<0 || *k<0) + { + return 0; + } int code = OP(*op) | (UPLO(*uplo) << 2); if(code>=8 || func[code]==0) return 0; - if(beta!=Scalar(1)) - matrix(c, *n, *n, *ldc) *= beta; + if(beta!=RealScalar(1)) + { + if(UPLO(*uplo)==UP) matrix(c, *n, *n, *ldc).triangularView() *= beta; + else matrix(c, *n, *n, *ldc).triangularView() *= beta; - func[code](*n, *k, a, *lda, c, *ldc, alpha); - return 1; + matrix(c, *n, *n, *ldc).diagonal().real() *= beta; + matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); + } + + if(*k>0 && alpha!=RealScalar(0)) + { + func[code](*n, *k, a, *lda, c, *ldc, alpha); + matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); + } + return 0; } // c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c, for op = 'N'or'n' @@ -357,7 +417,13 @@ int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *pal Scalar alpha = *reinterpret_cast(palpha); Scalar beta = *reinterpret_cast(pbeta); + if(*n<0 || *k<0) + { + return 0; + } + // TODO + std::cerr << "Eigen BLAS: _her2k is not implemented yet\n"; return 0; } diff --git a/blas/xerbla.cpp b/blas/xerbla.cpp new file mode 100644 index 000000000..2e7ad6eff --- /dev/null +++ b/blas/xerbla.cpp @@ -0,0 +1,17 @@ + +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +int xerbla_(char * msg, int *info, int) +{ + std::cerr << "Eigen BLAS ERROR #" << *info << ": " << msg << "\n"; + return 0; +} + +#ifdef __cplusplus +} +#endif diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 7d90882a2..b08f8c340 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -185,11 +185,17 @@ macro(ei_testing_print_summary) endif() if(EIGEN_TEST_ALTIVEC) - message("Altivec: Using architecture defaults") + message("Altivec: ON") else() message("Altivec: Using architecture defaults") endif() + if(EIGEN_TEST_NEON) + message("ARM NEON: ON") + else() + message("ARM NEON: Using architecture defaults") + endif() + if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) message("Explicit vec: OFF") else() diff --git a/doc/snippets/Map_general_stride.cpp b/doc/snippets/Map_general_stride.cpp new file mode 100644 index 000000000..0657e7f84 --- /dev/null +++ b/doc/snippets/Map_general_stride.cpp @@ -0,0 +1,5 @@ +int array[24]; +for(int i = 0; i < 24; ++i) array[i] = i; +cout << Map > + (array, 3, 3, Stride(8, 2)) + << endl; diff --git a/doc/snippets/Map_inner_stride.cpp b/doc/snippets/Map_inner_stride.cpp new file mode 100644 index 000000000..d95ae9b3e --- /dev/null +++ b/doc/snippets/Map_inner_stride.cpp @@ -0,0 +1,5 @@ +int array[12]; +for(int i = 0; i < 12; ++i) array[i] = i; +cout << Map > + (array, 6) // the inner stride has already been passed as template parameter + << endl; diff --git a/doc/snippets/Map_outer_stride.cpp b/doc/snippets/Map_outer_stride.cpp new file mode 100644 index 000000000..4bedaa508 --- /dev/null +++ b/doc/snippets/Map_outer_stride.cpp @@ -0,0 +1,5 @@ +int array[12]; +for(int i = 0; i < 12; ++i) array[i] = i; +cout << Map > + (array, 3, 3, OuterStride(4)) + << endl; diff --git a/doc/snippets/Map_simple.cpp b/doc/snippets/Map_simple.cpp new file mode 100644 index 000000000..423bb52ad --- /dev/null +++ b/doc/snippets/Map_simple.cpp @@ -0,0 +1,3 @@ +int array[9]; +for(int i = 0; i < 9; ++i) array[i] = i; +cout << Map(array) << endl; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b0da2a1d8..072f63e1d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -104,16 +104,18 @@ ei_add_test(cwiseop) ei_add_test(unalignedcount) ei_add_test(redux) ei_add_test(visitor) +ei_add_test(block) ei_add_test(product_small) ei_add_test(product_large) ei_add_test(product_extra) ei_add_test(diagonalmatrices) ei_add_test(adjoint) -ei_add_test(submatrices) +ei_add_test(diagonal) ei_add_test(miscmatrices) ei_add_test(commainitializer) ei_add_test(smallvectors) ei_add_test(map) +ei_add_test(mapstride) ei_add_test(array) ei_add_test(array_for_matrix) ei_add_test(array_replicate) diff --git a/test/submatrices.cpp b/test/block.cpp similarity index 58% rename from test/submatrices.cpp rename to test/block.cpp index d53fd4b6f..c180afb75 100644 --- a/test/submatrices.cpp +++ b/test/block.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2006-2010 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -51,16 +51,18 @@ template struct CheckMinor CheckMinor(MatrixType&, int, int) {} }; -template void submatrices(const MatrixType& m) +template void block(const MatrixType& m) { /* this test covers the following files: - Row.h Column.h Block.h Minor.h DiagonalCoeffs.h + Row.h Column.h Block.h Minor.h */ typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; typedef Matrix VectorType; typedef Matrix RowVectorType; - typedef Matrix SquareMatrixType; + typedef Matrix DynamicMatrixType; + typedef Matrix DynamicVectorType; + int rows = m.rows(); int cols = m.cols(); @@ -69,8 +71,6 @@ template void submatrices(const MatrixType& m) m3(rows, cols), mzero = MatrixType::Zero(rows, cols), ones = MatrixType::Ones(rows, cols); - SquareMatrixType identity = SquareMatrixType::Identity(rows, rows), - square = SquareMatrixType::Random(rows, rows); VectorType v1 = VectorType::Random(rows), v2 = VectorType::Random(rows), v3 = VectorType::Random(rows), @@ -84,20 +84,19 @@ template void submatrices(const MatrixType& m) int c2 = ei_random(c1,cols-1); //check row() and col() - VERIFY_IS_APPROX(m1.col(c1).transpose(), m1.transpose().row(c1)); - // FIXME perhaps we should re-enable that without the .eval() - VERIFY_IS_APPROX(m1.col(c1).dot(square.row(r1)), (square * m1.conjugate()).eval()(r1,c1)); + VERIFY_IS_EQUAL(m1.col(c1).transpose(), m1.transpose().row(c1)); //check operator(), both constant and non-constant, on row() and col() m1.row(r1) += s1 * m1.row(r2); m1.col(c1) += s1 * m1.col(c2); //check block() Matrix b1(1,1); b1(0,0) = m1(r1,c1); + RowVectorType br1(m1.block(r1,0,1,cols)); VectorType bc1(m1.block(0,c1,rows,1)); - VERIFY_IS_APPROX(b1, m1.block(r1,c1,1,1)); - VERIFY_IS_APPROX(m1.row(r1), br1); - VERIFY_IS_APPROX(m1.col(c1), bc1); + VERIFY_IS_EQUAL(b1, m1.block(r1,c1,1,1)); + VERIFY_IS_EQUAL(m1.row(r1), br1); + VERIFY_IS_EQUAL(m1.col(c1), bc1); //check operator(), both constant and non-constant, on block() m1.block(r1,c1,r2-r1+1,c2-c1+1) = s1 * m2.block(0, 0, r2-r1+1,c2-c1+1); m1.block(r1,c1,r2-r1+1,c2-c1+1)(r2-r1,c2-c1) = m2.block(0, 0, r2-r1+1,c2-c1+1)(0,0); @@ -105,11 +104,6 @@ template void submatrices(const MatrixType& m) //check minor() CheckMinor checkminor(m1,r1,c1); - //check diagonal() - VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal()); - m2.diagonal() = 2 * m1.diagonal(); - m2.diagonal()[0] *= 3; - const int BlockRows = EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime,2); const int BlockCols = EIGEN_ENUM_MIN(MatrixType::ColsAtCompileTime,5); if (rows>=5 && cols>=8) @@ -120,45 +114,23 @@ template void submatrices(const MatrixType& m) m1.template block(1,1)(0, 3) = m1.template block<2,5>(1,1)(1,2); // check that fixed block() and block() agree Matrix b = m1.template block(3,3); - VERIFY_IS_APPROX(b, m1.block(3,3,BlockRows,BlockCols)); + VERIFY_IS_EQUAL(b, m1.block(3,3,BlockRows,BlockCols)); } if (rows>2) { // test sub vectors - VERIFY_IS_APPROX(v1.template head<2>(), v1.block(0,0,2,1)); - VERIFY_IS_APPROX(v1.template head<2>(), v1.head(2)); - VERIFY_IS_APPROX(v1.template head<2>(), v1.segment(0,2)); - VERIFY_IS_APPROX(v1.template head<2>(), v1.template segment<2>(0)); + VERIFY_IS_EQUAL(v1.template head<2>(), v1.block(0,0,2,1)); + VERIFY_IS_EQUAL(v1.template head<2>(), v1.head(2)); + VERIFY_IS_EQUAL(v1.template head<2>(), v1.segment(0,2)); + VERIFY_IS_EQUAL(v1.template head<2>(), v1.template segment<2>(0)); int i = rows-2; - VERIFY_IS_APPROX(v1.template tail<2>(), v1.block(i,0,2,1)); - VERIFY_IS_APPROX(v1.template tail<2>(), v1.tail(2)); - VERIFY_IS_APPROX(v1.template tail<2>(), v1.segment(i,2)); - VERIFY_IS_APPROX(v1.template tail<2>(), v1.template segment<2>(i)); + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.block(i,0,2,1)); + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.tail(2)); + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.segment(i,2)); + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.template segment<2>(i)); i = ei_random(0,rows-2); - VERIFY_IS_APPROX(v1.segment(i,2), v1.template segment<2>(i)); - - enum { - N1 = MatrixType::RowsAtCompileTime>1 ? 1 : 0, - N2 = MatrixType::RowsAtCompileTime>2 ? -2 : 0 - }; - - // check sub/super diagonal - m2.template diagonal() = 2 * m1.template diagonal(); - m2.template diagonal()[0] *= 3; - VERIFY_IS_APPROX(m2.template diagonal()[0], static_cast(6) * m1.template diagonal()[0]); - - m2.template diagonal() = 2 * m1.template diagonal(); - m2.template diagonal()[0] *= 3; - VERIFY_IS_APPROX(m2.template diagonal()[0], static_cast(6) * m1.template diagonal()[0]); - - m2.diagonal(N1) = 2 * m1.diagonal(N1); - m2.diagonal(N1)[0] *= 3; - VERIFY_IS_APPROX(m2.diagonal(N1)[0], static_cast(6) * m1.diagonal(N1)[0]); - - m2.diagonal(N2) = 2 * m1.diagonal(N2); - m2.diagonal(N2)[0] *= 3; - VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast(6) * m1.diagonal(N2)[0]); + VERIFY_IS_EQUAL(v1.segment(i,2), v1.template segment<2>(i)); } // stress some basic stuffs with block matrices @@ -167,6 +139,49 @@ template void submatrices(const MatrixType& m) VERIFY(ei_real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows)); VERIFY(ei_real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols)); + + // now test some block-inside-of-block. + + // expressions with direct access + VERIFY_IS_EQUAL( (m1.block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , (m1.block(r2,c2,rows-r2,cols-c2)) ); + VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , (m1.row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , (m1.col(c1).segment(r1,r2-r1+1)) ); + VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , (m1.row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_EQUAL( (m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , (m1.row(r1).segment(c1,c2-c1+1)) ); + + // expressions without direct access + VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , ((m1+m2).block(r2,c2,rows-r2,cols-c2)) ); + VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , ((m1+m2).col(c1).segment(r1,r2-r1+1)) ); + VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_EQUAL( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); + + // evaluation into plain matrices from expressions with direct access (stress MapBase) + DynamicMatrixType dm; + DynamicVectorType dv; + dm.setZero(); + dm = m1.block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2); + VERIFY_IS_EQUAL(dm, (m1.block(r2,c2,rows-r2,cols-c2))); + dm.setZero(); + dv.setZero(); + dm = m1.block(r1,c1,r2-r1+1,c2-c1+1).row(0).transpose(); + dv = m1.row(r1).segment(c1,c2-c1+1); + VERIFY_IS_EQUAL(dv, dm); + dm.setZero(); + dv.setZero(); + dm = m1.col(c1).segment(r1,r2-r1+1); + dv = m1.block(r1,c1,r2-r1+1,c2-c1+1).col(0); + VERIFY_IS_EQUAL(dv, dm); + dm.setZero(); + dv.setZero(); + dm = m1.block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0); + dv = m1.row(r1).segment(c1,c2-c1+1); + VERIFY_IS_EQUAL(dv, dm); + dm.setZero(); + dv.setZero(); + dm = m1.row(r1).segment(c1,c2-c1+1).transpose(); + dv = m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0); + VERIFY_IS_EQUAL(dv, dm); } @@ -176,18 +191,30 @@ void compare_using_data_and_stride(const MatrixType& m) int rows = m.rows(); int cols = m.cols(); int size = m.size(); - int stride = m.stride(); + int innerStride = m.innerStride(); + int outerStride = m.outerStride(); + int rowStride = m.rowStride(); + int colStride = m.colStride(); const typename MatrixType::Scalar* data = m.data(); for(int j=0;j()) ); - CALL_SUBTEST_2( submatrices(Matrix4d()) ); - CALL_SUBTEST_3( submatrices(MatrixXcf(3, 3)) ); - CALL_SUBTEST_4( submatrices(MatrixXi(8, 12)) ); - CALL_SUBTEST_5( submatrices(MatrixXcd(20, 20)) ); - CALL_SUBTEST_6( submatrices(MatrixXf(20, 20)) ); + CALL_SUBTEST_1( block(Matrix()) ); + CALL_SUBTEST_2( block(Matrix4d()) ); + CALL_SUBTEST_3( block(MatrixXcf(3, 3)) ); + CALL_SUBTEST_4( block(MatrixXi(8, 12)) ); + CALL_SUBTEST_5( block(MatrixXcd(20, 20)) ); + CALL_SUBTEST_6( block(MatrixXf(20, 20)) ); - CALL_SUBTEST_8( submatrices(Matrix(3, 4)) ); + CALL_SUBTEST_8( block(Matrix(3, 4)) ); +#ifndef EIGEN_DEFAULT_TO_ROW_MAJOR CALL_SUBTEST_6( data_and_stride(MatrixXf(ei_random(5,50), ei_random(5,50))) ); CALL_SUBTEST_7( data_and_stride(Matrix(ei_random(5,50), ei_random(5,50))) ); +#endif } } diff --git a/test/cholesky.cpp b/test/cholesky.cpp index 1bb808d20..a446f5d73 100644 --- a/test/cholesky.cpp +++ b/test/cholesky.cpp @@ -95,7 +95,7 @@ template void cholesky(const MatrixType& m) { LLT chollo(symmLo); - VERIFY_IS_APPROX(symm, chollo.matrixL().toDenseMatrix() * chollo.matrixL().adjoint().toDenseMatrix()); + VERIFY_IS_APPROX(symm, chollo.reconstructedMatrix()); vecX = chollo.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); matX = chollo.solve(matB); @@ -103,7 +103,7 @@ template void cholesky(const MatrixType& m) // test the upper mode LLT cholup(symmUp); - VERIFY_IS_APPROX(symm, cholup.matrixL().toDenseMatrix() * cholup.matrixL().adjoint().toDenseMatrix()); + VERIFY_IS_APPROX(symm, cholup.reconstructedMatrix()); vecX = cholup.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); matX = cholup.solve(matB); @@ -119,8 +119,7 @@ template void cholesky(const MatrixType& m) { LDLT ldlt(symm); - // TODO(keir): This doesn't make sense now that LDLT pivots. - //VERIFY_IS_APPROX(symm, ldlt.matrixL() * ldlt.vectorD().asDiagonal() * ldlt.matrixL().adjoint()); + VERIFY_IS_APPROX(symm, ldlt.reconstructedMatrix()); vecX = ldlt.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); matX = ldlt.solve(matB); diff --git a/test/diagonal.cpp b/test/diagonal.cpp new file mode 100644 index 000000000..288d58c6e --- /dev/null +++ b/test/diagonal.cpp @@ -0,0 +1,81 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" + +template void diagonal(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix VectorType; + typedef Matrix RowVectorType; + int rows = m.rows(); + int cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols); + + //check diagonal() + VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal()); + m2.diagonal() = 2 * m1.diagonal(); + m2.diagonal()[0] *= 3; + + if (rows>2) + { + enum { + N1 = MatrixType::RowsAtCompileTime>1 ? 1 : 0, + N2 = MatrixType::RowsAtCompileTime>2 ? -2 : 0 + }; + + // check sub/super diagonal + m2.template diagonal() = 2 * m1.template diagonal(); + m2.template diagonal()[0] *= 3; + VERIFY_IS_APPROX(m2.template diagonal()[0], static_cast(6) * m1.template diagonal()[0]); + + m2.template diagonal() = 2 * m1.template diagonal(); + m2.template diagonal()[0] *= 3; + VERIFY_IS_APPROX(m2.template diagonal()[0], static_cast(6) * m1.template diagonal()[0]); + + m2.diagonal(N1) = 2 * m1.diagonal(N1); + m2.diagonal(N1)[0] *= 3; + VERIFY_IS_APPROX(m2.diagonal(N1)[0], static_cast(6) * m1.diagonal(N1)[0]); + + m2.diagonal(N2) = 2 * m1.diagonal(N2); + m2.diagonal(N2)[0] *= 3; + VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast(6) * m1.diagonal(N2)[0]); + } +} + +void test_diagonal() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( diagonal(Matrix()) ); + CALL_SUBTEST_2( diagonal(Matrix4d()) ); + CALL_SUBTEST_2( diagonal(MatrixXcf(3, 3)) ); + CALL_SUBTEST_2( diagonal(MatrixXi(8, 12)) ); + CALL_SUBTEST_2( diagonal(MatrixXcd(20, 20)) ); + CALL_SUBTEST_1( diagonal(MatrixXf(21, 19)) ); + CALL_SUBTEST_1( diagonal(Matrix(3, 4)) ); + } +} diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp index 895fe0f08..b1a50f6b2 100644 --- a/test/geo_transformations.cpp +++ b/test/geo_transformations.cpp @@ -59,6 +59,7 @@ template void transformations(void) Matrix3 matrot1, m; Scalar a = ei_random(-Scalar(M_PI), Scalar(M_PI)); + Scalar s0 = ei_random(); VERIFY_IS_APPROX(v0, AngleAxisx(a, v0.normalized()) * v0); VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(M_PI), v0.unitOrthogonal()) * v0); @@ -234,6 +235,16 @@ template void transformations(void) t1 = Matrix3(q1) * (AlignedScaling3(v0) * Translation3(v0)); VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + t0.setIdentity(); + t0.scale(s0).translate(v0); + t1 = Scaling(s0) * Translation3(v0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t0.prescale(s0); + t1 = Scaling(s0) * t1; + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + t0.setIdentity(); t0.prerotate(q1).prescale(v0).pretranslate(v0); // translation * aligned scaling and transformation * mat diff --git a/test/inverse.cpp b/test/inverse.cpp index 713caf4a6..1e567ad14 100644 --- a/test/inverse.cpp +++ b/test/inverse.cpp @@ -42,7 +42,7 @@ template void inverse(const MatrixType& m) m2(rows, cols), mzero = MatrixType::Zero(rows, cols), identity = MatrixType::Identity(rows, rows); - createRandomMatrixOfRank(rows,rows,rows,m1); + createRandomPIMatrixOfRank(rows,rows,rows,m1); m2 = m1.inverse(); VERIFY_IS_APPROX(m1, m2.inverse() ); diff --git a/test/lu.cpp b/test/lu.cpp index 568db8230..37e2990d2 100644 --- a/test/lu.cpp +++ b/test/lu.cpp @@ -29,6 +29,7 @@ using namespace std; template void lu_non_invertible() { typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; /* this test covers the following files: LU.h */ @@ -51,39 +52,46 @@ template void lu_non_invertible() cols2 = cols = MatrixType::ColsAtCompileTime; } + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; typedef typename ei_kernel_retval_base >::ReturnType KernelMatrixType; typedef typename ei_image_retval_base >::ReturnType ImageMatrixType; - typedef Matrix DynamicMatrixType; - typedef Matrix + typedef Matrix CMatrixType; + typedef Matrix + RMatrixType; int rank = ei_random(1, std::min(rows, cols)-1); // The image of the zero matrix should consist of a single (zero) column vector VERIFY((MatrixType::Zero(rows,cols).fullPivLu().image(MatrixType::Zero(rows,cols)).cols() == 1)); - + MatrixType m1(rows, cols), m3(rows, cols2); CMatrixType m2(cols, cols2); - createRandomMatrixOfRank(rank, rows, cols, m1); + createRandomPIMatrixOfRank(rank, rows, cols, m1); + + FullPivLU lu; + + // The special value 0.01 below works well in tests. Keep in mind that we're only computing the rank + // of singular values are either 0 or 1. + // So it's not clear at all that the epsilon should play any role there. + lu.setThreshold(RealScalar(0.01)); + lu.compute(m1); + + MatrixType u(rows,cols); + u = lu.matrixLU().template triangularView(); + RMatrixType l = RMatrixType::Identity(rows,rows); + l.block(0,0,rows,std::min(rows,cols)).template triangularView() + = lu.matrixLU().block(0,0,rows,std::min(rows,cols)); - FullPivLU lu(m1); - // FIXME need better way to construct trapezoid matrices. extend triangularView to support rectangular. - DynamicMatrixType u(rows,cols); - for(int i = 0; i < rows; i++) - for(int j = 0; j < cols; j++) - u(i,j) = i>j ? Scalar(0) : lu.matrixLU()(i,j); - DynamicMatrixType l(rows,rows); - for(int i = 0; i < rows; i++) - for(int j = 0; j < rows; j++) - l(i,j) = (i=cols) ? Scalar(0) - : i==j ? Scalar(1) - : lu.matrixLU()(i,j); - VERIFY_IS_APPROX(lu.permutationP() * m1 * lu.permutationQ(), l*u); - + KernelMatrixType m1kernel = lu.kernel(); ImageMatrixType m1image = lu.image(m1); + VERIFY_IS_APPROX(m1, lu.reconstructedMatrix()); VERIFY(rank == lu.rank()); VERIFY(cols - lu.rank() == lu.dimensionOfKernel()); VERIFY(!lu.isInjective()); @@ -91,9 +99,8 @@ template void lu_non_invertible() VERIFY(!lu.isSurjective()); VERIFY((m1 * m1kernel).isMuchSmallerThan(m1)); VERIFY(m1image.fullPivLu().rank() == rank); - DynamicMatrixType sidebyside(m1.rows(), m1.cols() + m1image.cols()); - sidebyside << m1, m1image; - VERIFY(sidebyside.fullPivLu().rank() == rank); + VERIFY_IS_APPROX(m1 * m1.adjoint() * m1image, m1image); + m2 = CMatrixType::Random(cols,cols2); m3 = m1*m2; m2 = CMatrixType::Random(cols,cols2); @@ -107,20 +114,19 @@ template void lu_invertible() /* this test covers the following files: LU.h */ + typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; int size = ei_random(1,200); MatrixType m1(size, size), m2(size, size), m3(size, size); - m1 = MatrixType::Random(size,size); + FullPivLU lu; + lu.setThreshold(0.01); + do { + m1 = MatrixType::Random(size,size); + lu.compute(m1); + } while(!lu.isInvertible()); - if (ei_is_same_type::ret) - { - // let's build a matrix more stable to inverse - MatrixType a = MatrixType::Random(size,size*2); - m1 += a * a.adjoint(); - } - - FullPivLU lu(m1); + VERIFY_IS_APPROX(m1, lu.reconstructedMatrix()); VERIFY(0 == lu.dimensionOfKernel()); VERIFY(lu.kernel().cols() == 1); // the kernel() should consist of a single (zero) column vector VERIFY(size == lu.rank()); @@ -134,6 +140,23 @@ template void lu_invertible() VERIFY_IS_APPROX(m2, lu.inverse()*m3); } +template void lu_partial_piv() +{ + /* this test covers the following files: + PartialPivLU.h + */ + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + int rows = ei_random(1,4); + int cols = rows; + + MatrixType m1(cols, rows); + m1.setRandom(); + PartialPivLU plu(m1); + + VERIFY_IS_APPROX(m1, plu.reconstructedMatrix()); +} + template void lu_verify_assert() { MatrixType tmp; @@ -169,21 +192,23 @@ void test_lu() CALL_SUBTEST_2( (lu_non_invertible >()) ); CALL_SUBTEST_2( (lu_verify_assert >()) ); - + CALL_SUBTEST_3( lu_non_invertible() ); CALL_SUBTEST_3( lu_invertible() ); CALL_SUBTEST_3( lu_verify_assert() ); - + CALL_SUBTEST_4( lu_non_invertible() ); CALL_SUBTEST_4( lu_invertible() ); + CALL_SUBTEST_4( lu_partial_piv() ); CALL_SUBTEST_4( lu_verify_assert() ); - + CALL_SUBTEST_5( lu_non_invertible() ); CALL_SUBTEST_5( lu_invertible() ); CALL_SUBTEST_5( lu_verify_assert() ); - + CALL_SUBTEST_6( lu_non_invertible() ); CALL_SUBTEST_6( lu_invertible() ); + CALL_SUBTEST_6( lu_partial_piv() ); CALL_SUBTEST_6( lu_verify_assert() ); CALL_SUBTEST_7(( lu_non_invertible >() )); diff --git a/test/main.h b/test/main.h index 64f70b394..d4f96e3d2 100644 --- a/test/main.h +++ b/test/main.h @@ -148,7 +148,7 @@ namespace Eigen #define EIGEN_INTERNAL_DEBUGGING #define EIGEN_NICE_RANDOM -#include // required for createRandomMatrixOfRank +#include // required for createRandomPIMatrixOfRank #define VERIFY(a) do { if (!(a)) { \ @@ -157,6 +157,7 @@ namespace Eigen exit(2); \ } } while (0) +#define VERIFY_IS_EQUAL(a, b) VERIFY(test_is_equal(a, b)) #define VERIFY_IS_APPROX(a, b) VERIFY(test_ei_isApprox(a, b)) #define VERIFY_IS_NOT_APPROX(a, b) VERIFY(!test_ei_isApprox(a, b)) #define VERIFY_IS_MUCH_SMALLER_THAN(a, b) VERIFY(test_ei_isMuchSmallerThan(a, b)) @@ -342,8 +343,59 @@ inline bool test_isUnitary(const MatrixBase& m) return m.isUnitary(test_precision::Scalar>()); } +template +struct test_is_equal_impl +{ + static bool run(const Derived1& a1, const Derived2& a2) + { + if(a1.size() != a2.size()) return false; + // we evaluate a2 into a temporary of the shape of a1. this allows to let Assign.h handle the transposing if needed. + typename Derived1::PlainObject a2_evaluated(a2); + for(int i = 0; i < a1.size(); ++i) + if(a1.coeff(i) != a2_evaluated.coeff(i)) return false; + return true; + } +}; + +template +struct test_is_equal_impl +{ + static bool run(const Derived1& a1, const Derived2& a2) + { + if(a1.rows() != a2.rows()) return false; + if(a1.cols() != a2.cols()) return false; + for(int j = 0; j < a1.cols(); ++j) + for(int i = 0; i < a1.rows(); ++i) + if(a1.coeff(i,j) != a2.coeff(i,j)) return false; + return true; + } +}; + +template +bool test_is_equal(const Derived1& a1, const Derived2& a2) +{ + return test_is_equal_impl::run(a1, a2); +} + +bool test_is_equal(const int actual, const int expected) +{ + if (actual==expected) + return true; + // false: + std::cerr + << std::endl << " actual = " << actual + << std::endl << " expected = " << expected << std::endl << std::endl; + return false; +} + +/** Creates a random Partial Isometry matrix of given rank. + * + * A partial isometry is a matrix all of whose singular values are either 0 or 1. + * This is very useful to test rank-revealing algorithms. + */ template -void createRandomMatrixOfRank(int desired_rank, int rows, int cols, MatrixType& m) +void createRandomPIMatrixOfRank(int desired_rank, int rows, int cols, MatrixType& m) { typedef typename ei_traits::Scalar Scalar; enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; @@ -360,7 +412,8 @@ void createRandomMatrixOfRank(int desired_rank, int rows, int cols, MatrixType& if(desired_rank == 1) { - m = VectorType::Random(rows) * VectorType::Random(cols).transpose(); + // here we normalize the vectors to get a partial isometry + m = VectorType::Random(rows).normalized() * VectorType::Random(cols).normalized().transpose(); return; } diff --git a/test/map.cpp b/test/map.cpp index 603b6159b..acaa8fecc 100644 --- a/test/map.cpp +++ b/test/map.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2006-2010 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -42,8 +42,8 @@ template void map_class_vector(const VectorType& m) VectorType ma1 = Map(array1, size); VectorType ma2 = Map(array2, size); VectorType ma3 = Map(array3unaligned, size); - VERIFY_IS_APPROX(ma1, ma2); - VERIFY_IS_APPROX(ma1, ma3); + VERIFY_IS_EQUAL(ma1, ma2); + VERIFY_IS_EQUAL(ma1, ma3); VERIFY_RAISES_ASSERT((Map(array3unaligned, size))); ei_aligned_delete(array1, size); @@ -70,9 +70,9 @@ template void map_class_matrix(const MatrixType& m) Map(array3unaligned, rows, cols) = Map(array1, rows, cols); MatrixType ma1 = Map(array1, rows, cols); MatrixType ma2 = Map(array2, rows, cols); - VERIFY_IS_APPROX(ma1, ma2); + VERIFY_IS_EQUAL(ma1, ma2); MatrixType ma3 = Map(array3unaligned, rows, cols); - VERIFY_IS_APPROX(ma1, ma3); + VERIFY_IS_EQUAL(ma1, ma3); ei_aligned_delete(array1, size); ei_aligned_delete(array2, size); @@ -97,8 +97,8 @@ template void map_static_methods(const VectorType& m) VectorType ma1 = VectorType::Map(array1, size); VectorType ma2 = VectorType::MapAligned(array2, size); VectorType ma3 = VectorType::Map(array3unaligned, size); - VERIFY_IS_APPROX(ma1, ma2); - VERIFY_IS_APPROX(ma1, ma3); + VERIFY_IS_EQUAL(ma1, ma2); + VERIFY_IS_EQUAL(ma1, ma3); ei_aligned_delete(array1, size); ei_aligned_delete(array2, size); diff --git a/test/mapstride.cpp b/test/mapstride.cpp new file mode 100644 index 000000000..7a1605681 --- /dev/null +++ b/test/mapstride.cpp @@ -0,0 +1,139 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" + +template void map_class_vector(const VectorType& m) +{ + typedef typename VectorType::Scalar Scalar; + + int size = m.size(); + + VectorType v = VectorType::Random(size); + + int arraysize = 3*size; + + Scalar* array = ei_aligned_new(arraysize); + + { + Map > map(array, size); + map = v; + for(int i = 0; i < size; ++i) + { + VERIFY(array[3*i] == v[i]); + VERIFY(map[i] == v[i]); + } + } + + { + Map > map(array, size, InnerStride(2)); + map = v; + for(int i = 0; i < size; ++i) + { + VERIFY(array[2*i] == v[i]); + VERIFY(map[i] == v[i]); + } + } + + ei_aligned_delete(array, arraysize); +} + +template void map_class_matrix(const MatrixType& _m) +{ + typedef typename MatrixType::Scalar Scalar; + + int rows = _m.rows(), cols = _m.cols(); + + MatrixType m = MatrixType::Random(rows,cols); + + int arraysize = 2*(rows+4)*(cols+4); + + Scalar* array = ei_aligned_new(arraysize); + + // test no inner stride and some dynamic outer stride + { + Map > map(array, rows, cols, OuterStride(m.innerSize()+1)); + map = m; + VERIFY(map.outerStride() == map.innerSize()+1); + for(int i = 0; i < m.outerSize(); ++i) + for(int j = 0; j < m.innerSize(); ++j) + { + VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j)); + VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); + } + } + + // test no inner stride and an outer stride of +4. This is quite important as for fixed-size matrices, + // this allows to hit the special case where it's vectorizable. + { + enum { + InnerSize = MatrixType::InnerSizeAtCompileTime, + OuterStrideAtCompileTime = InnerSize==Dynamic ? Dynamic : InnerSize+4 + }; + Map > + map(array, rows, cols, OuterStride(m.innerSize()+4)); + map = m; + VERIFY(map.outerStride() == map.innerSize()+4); + for(int i = 0; i < m.outerSize(); ++i) + for(int j = 0; j < m.innerSize(); ++j) + { + VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j)); + VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); + } + } + + // test both inner stride and outer stride + { + Map > map(array, rows, cols, Stride(2*m.innerSize()+1, 2)); + map = m; + VERIFY(map.outerStride() == 2*map.innerSize()+1); + VERIFY(map.innerStride() == 2); + for(int i = 0; i < m.outerSize(); ++i) + for(int j = 0; j < m.innerSize(); ++j) + { + VERIFY(array[map.outerStride()*i+map.innerStride()*j] == m.coeffByOuterInner(i,j)); + VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); + } + } + + ei_aligned_delete(array, arraysize); +} + +void test_mapstride() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( map_class_vector(Matrix()) ); + CALL_SUBTEST_2( map_class_vector(Vector4d()) ); + CALL_SUBTEST_3( map_class_vector(RowVector4f()) ); + CALL_SUBTEST_4( map_class_vector(VectorXcf(8)) ); + CALL_SUBTEST_5( map_class_vector(VectorXi(12)) ); + + CALL_SUBTEST_1( map_class_matrix(Matrix()) ); + CALL_SUBTEST_2( map_class_matrix(Matrix4d()) ); + CALL_SUBTEST_3( map_class_matrix(Matrix()) ); + CALL_SUBTEST_3( map_class_matrix(Matrix()) ); + CALL_SUBTEST_4( map_class_matrix(MatrixXcf(ei_random(1,10),ei_random(1,10))) ); + CALL_SUBTEST_5( map_class_matrix(MatrixXi(5,5)));//ei_random(1,10),ei_random(1,10))) ); + } +} diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 71c2dcb18..8b8e8302e 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -78,7 +78,9 @@ template void mixingtypes(int size = SizeAtCompileType) // check dot product vf.dot(vf); +#if 0 // we get other compilation errors here than just static asserts VERIFY_RAISES_ASSERT(vd.dot(vf)); +#endif VERIFY_RAISES_ASSERT(vcf.dot(vf)); // yeah eventually we should allow this but i'm too lazy to make that change now in Dot.h // especially as that might be rewritten as cwise product .sum() which would make that automatic. diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 7d863e616..e0cb61525 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -32,7 +32,10 @@ template T ei_negate(const T& x) { return -x; } template bool areApprox(const Scalar* a, const Scalar* b, int size) { for (int i=0; i void permutationmatrices(const MatrixType& m) typedef Matrix LeftPermutationVectorType; typedef PermutationMatrix RightPermutationType; typedef Matrix RightPermutationVectorType; - + int rows = m.rows(); int cols = m.cols(); @@ -72,7 +72,7 @@ template void permutationmatrices(const MatrixType& m) Matrix rm(rp); VERIFY_IS_APPROX(m_permuted, lm*m_original*rm); - + VERIFY_IS_APPROX(lp.inverse()*m_permuted*rp.inverse(), m_original); VERIFY((lp*lp.inverse()).toDenseMatrix().isIdentity()); @@ -86,6 +86,23 @@ template void permutationmatrices(const MatrixType& m) identityp.setIdentity(rows); VERIFY_IS_APPROX(m_original, identityp*m_original); + // check inplace permutations + m_permuted = m_original; + m_permuted = lp.inverse() * m_permuted; + VERIFY_IS_APPROX(m_permuted, lp.inverse()*m_original); + + m_permuted = m_original; + m_permuted = m_permuted * rp.inverse(); + VERIFY_IS_APPROX(m_permuted, m_original*rp.inverse()); + + m_permuted = m_original; + m_permuted = lp * m_permuted; + VERIFY_IS_APPROX(m_permuted, lp*m_original); + + m_permuted = m_original; + m_permuted = m_permuted * rp; + VERIFY_IS_APPROX(m_permuted, m_original*rp); + if(rows>1 && cols>1) { lp2 = lp; @@ -114,7 +131,7 @@ void test_permutationmatrices() CALL_SUBTEST_2( permutationmatrices(Matrix3f()) ); CALL_SUBTEST_3( permutationmatrices(Matrix()) ); CALL_SUBTEST_4( permutationmatrices(Matrix4d()) ); - CALL_SUBTEST_5( permutationmatrices(Matrix()) ); + CALL_SUBTEST_5( permutationmatrices(Matrix()) ); CALL_SUBTEST_6( permutationmatrices(Matrix(20, 30)) ); CALL_SUBTEST_7( permutationmatrices(MatrixXcf(15, 10)) ); } diff --git a/test/qr.cpp b/test/qr.cpp index f2393c13b..1ce1f46e6 100644 --- a/test/qr.cpp +++ b/test/qr.cpp @@ -117,6 +117,7 @@ void test_qr() CALL_SUBTEST_3(( qr_fixedsize, 2 >() )); CALL_SUBTEST_4(( qr_fixedsize, 4 >() )); CALL_SUBTEST_5(( qr_fixedsize, 7 >() )); + CALL_SUBTEST_11( qr(Matrix()) ); } for(int i = 0; i < g_repeat; i++) { diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp index 16eb27c52..96cc66316 100644 --- a/test/qr_colpivoting.cpp +++ b/test/qr_colpivoting.cpp @@ -36,7 +36,7 @@ template void qr() typedef Matrix MatrixQType; typedef Matrix VectorType; MatrixType m1; - createRandomMatrixOfRank(rank,rows,cols,m1); + createRandomPIMatrixOfRank(rank,rows,cols,m1); ColPivHouseholderQR qr(m1); VERIFY_IS_APPROX(rank, qr.rank()); VERIFY(cols - qr.rank() == qr.dimensionOfKernel()); @@ -64,7 +64,7 @@ template void qr_fixedsize() typedef typename MatrixType::Scalar Scalar; int rank = ei_random(1, std::min(int(Rows), int(Cols))-1); Matrix m1; - createRandomMatrixOfRank(rank,Rows,Cols,m1); + createRandomPIMatrixOfRank(rank,Rows,Cols,m1); ColPivHouseholderQR > qr(m1); VERIFY_IS_APPROX(rank, qr.rank()); VERIFY(Cols - qr.rank() == qr.dimensionOfKernel()); diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp index c82ba4c7e..7ad3af1fe 100644 --- a/test/qr_fullpivoting.cpp +++ b/test/qr_fullpivoting.cpp @@ -35,7 +35,7 @@ template void qr() typedef Matrix MatrixQType; typedef Matrix VectorType; MatrixType m1; - createRandomMatrixOfRank(rank,rows,cols,m1); + createRandomPIMatrixOfRank(rank,rows,cols,m1); FullPivHouseholderQR qr(m1); VERIFY_IS_APPROX(rank, qr.rank()); VERIFY(cols - qr.rank() == qr.dimensionOfKernel()); diff --git a/test/testsuite.cmake b/test/testsuite.cmake index 90edf2853..b68a327a9 100644 --- a/test/testsuite.cmake +++ b/test/testsuite.cmake @@ -147,6 +147,9 @@ endif(NOT EIGEN_NO_UPDATE) # which ctest command to use for running the dashboard SET (CTEST_COMMAND "${EIGEN_CMAKE_DIR}ctest -D ${EIGEN_MODE}") +if($ENV{EIGEN_CTEST_ARGS}) +SET (CTEST_COMMAND "${CTEST_COMMAND} $ENV{EIGEN_CTEST_ARGS}") +endif($ENV{EIGEN_CTEST_ARGS}) # what cmake command to use for configuring this dashboard SET (CTEST_CMAKE_COMMAND "${EIGEN_CMAKE_DIR}cmake -DEIGEN_LEAVE_TEST_IN_ALL_TARGET=ON") diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 5d86df7b3..94a8a5c96 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -22,6 +22,7 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . +#define EIGEN_DEBUG_ASSIGN #include "main.h" #include @@ -33,6 +34,14 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling) && ei_assign_traits::Unrolling==unrolling; } +template +bool test_assign(int traversal, int unrolling) +{ + ei_assign_traits::debug(); + return ei_assign_traits::Traversal==traversal + && ei_assign_traits::Unrolling==unrolling; +} + template bool test_redux(const Xpr&, int traversal, int unrolling) { @@ -86,6 +95,15 @@ void test_vectorization_logic() VERIFY(test_assign(MatrixXf(10,10),MatrixXf(20,20).block(10,10,2,3), SliceVectorizedTraversal,NoUnrolling)); + VERIFY((test_assign< + Map, Aligned, OuterStride<12> >, + Matrix + >(InnerVectorizedTraversal,CompleteUnrolling))); + + VERIFY((test_assign< + Map, Aligned, InnerStride<12> >, + Matrix + >(DefaultTraversal,CompleteUnrolling))); VERIFY(test_redux(VectorXf(10), LinearVectorizedTraversal,NoUnrolling)); diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index c37628ed8..3c4d85d3b 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -173,7 +173,7 @@ class FFT template inline - void fwd( MatrixBase & dst, const MatrixBase & src) + void fwd( MatrixBase & dst, const MatrixBase & src,int nfft=-1) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) @@ -183,16 +183,25 @@ class FFT EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) - if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) ) - dst.derived().resize( (src.size()>>1)+1); - else - dst.derived().resize(src.size()); + if (nfft<1) + nfft = src.size(); - if (src.stride() != 1) { - Matrix tmp = src; - fwd( &dst[0],&tmp[0],src.size() ); + if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) ) + dst.derived().resize( (nfft>>1)+1); + else + dst.derived().resize(nfft); + + if ( src.stride() != 1 || src.size() < nfft ) { + Matrix tmp; + if (src.size() & dst, const MatrixBase & src, int nfft=-1) { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time - EIGEN_STATIC_ASSERT((ei_is_same_type::ret), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) + typedef typename ComplexDerived::Scalar src_type; + typedef typename OutputDerived::Scalar dst_type; + const bool realfft= (NumTraits::IsComplex == 0); + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time + EIGEN_STATIC_ASSERT((ei_is_same_type::ret), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) - if (nfft<1) { - nfft = ( NumTraits::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size(); - } - dst.derived().resize( nfft ); - if (src.stride() != 1) { - // if the vector is strided, then we need to copy it to a packed temporary - Matrix tmp = src; - inv( &dst[0],&tmp[0], nfft); + if (nfft<1) { //automatic FFT size determination + if ( realfft && HasFlag(HalfSpectrum) ) + nfft = 2*(src.size()-1); //assume even fft size + else + nfft = src.size(); + } + dst.derived().resize( nfft ); + + // check for nfft that does not fit the input data size + int resize_input= ( realfft && HasFlag(HalfSpectrum) ) + ? ( (nfft/2+1) - src.size() ) + : ( nfft - src.size() ); + + if ( src.stride() != 1 || resize_input ) { + // if the vector is strided, then we need to copy it to a packed temporary + Matrix tmp; + if ( resize_input ) { + size_t ncopy = min(src.size(),src.size() + resize_input); + tmp.setZero(src.size() + resize_input); + if ( realfft && HasFlag(HalfSpectrum) ) { + // pad at the Nyquist bin + tmp.head(ncopy) = src.head(ncopy); + tmp(ncopy-1) = real(tmp(ncopy-1)); // enforce real-only Nyquist bin + }else{ + size_t nhead,ntail; + nhead = 1+ncopy/2-1; // range [0:pi) + ntail = ncopy/2-1; // range (-pi:0) + tmp.head(nhead) = src.head(nhead); + tmp.tail(ntail) = src.tail(ntail); + if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it + tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5); + }else{ // expanding -- split the old Nyquist bin into two halves + tmp(nhead) = src(nhead) * src_type(.5); + tmp(tmp.size()-nhead) = tmp(nhead); + } + } }else{ - inv( &dst[0],&src[0], nfft); + tmp = src; } + inv( &dst[0],&tmp[0], nfft); + }else{ + inv( &dst[0],&src[0], nfft); + } } template diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h index 12322a256..e8069154c 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h @@ -178,9 +178,9 @@ class MatrixFunction * * This is morally a \c static \c const \c Scalar, but only * integers can be static constant class members in C++. The - * separation constant is set to 0.01, a value taken from the + * separation constant is set to 0.1, a value taken from the * paper by Davies and Higham. */ - static const RealScalar separation() { return static_cast(0.01); } + static const RealScalar separation() { return static_cast(0.1); } }; /** \brief Constructor. @@ -492,14 +492,12 @@ typename MatrixFunction::DynMatrixType MatrixFunction class MatrixFunctionReturnValue : public ReturnByValue > { - private: + public: typedef typename ei_traits::Scalar Scalar; typedef typename ei_stem_function::type StemFunction; - public: - - /** \brief Constructor. + /** \brief Constructor. * * \param[in] A %Matrix (expression) forming the argument of the * matrix function. diff --git a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h index 35dc332e0..d75b1407c 100644 --- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h @@ -194,6 +194,8 @@ template HybridNonLinearSolverSpace::Status HybridNonLinearSolver::solveOneStep(FVectorType &x) { + assert(x.size()==n); // check the caller is not cheating us + int j; std::vector > v_givens(n), w_givens(n); @@ -350,6 +352,8 @@ HybridNonLinearSolverSpace::Status HybridNonLinearSolver::solve(FVectorType &x) { HybridNonLinearSolverSpace::Status status = solveInit(x); + if (status==HybridNonLinearSolverSpace::ImproperInputParameters) + return status; while (status==HybridNonLinearSolverSpace::Running) status = solveOneStep(x); return status; @@ -429,6 +433,8 @@ template HybridNonLinearSolverSpace::Status HybridNonLinearSolver::solveNumericalDiffOneStep(FVectorType &x) { + assert(x.size()==n); // check the caller is not cheating us + int j; std::vector > v_givens(n), w_givens(n); @@ -587,6 +593,8 @@ HybridNonLinearSolverSpace::Status HybridNonLinearSolver::solveNumericalDiff(FVectorType &x) { HybridNonLinearSolverSpace::Status status = solveNumericalDiffInit(x); + if (status==HybridNonLinearSolverSpace::ImproperInputParameters) + return status; while (status==HybridNonLinearSolverSpace::Running) status = solveNumericalDiffOneStep(x); return status; diff --git a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h index 8bae1e131..f99366bbc 100644 --- a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +++ b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h @@ -161,6 +161,8 @@ LevenbergMarquardtSpace::Status LevenbergMarquardt::minimize(FVectorType &x) { LevenbergMarquardtSpace::Status status = minimizeInit(x); + if (status==LevenbergMarquardtSpace::ImproperInputParameters) + return status; do { status = minimizeOneStep(x); } while (status==LevenbergMarquardtSpace::Running); @@ -214,7 +216,7 @@ template LevenbergMarquardtSpace::Status LevenbergMarquardt::minimizeOneStep(FVectorType &x) { - int j; + assert(x.size()==n); // check the caller is not cheating us /* calculate the jacobian matrix. */ int df_ret = functor.df(x, fjac); @@ -235,7 +237,7 @@ LevenbergMarquardt::minimizeOneStep(FVectorType &x) /* to the norms of the columns of the initial jacobian. */ if (iter == 1) { if (!useExternalScaling) - for (j = 0; j < n; ++j) + for (int j = 0; j < n; ++j) diag[j] = (wa2[j]==0.)? 1. : wa2[j]; /* on the first iteration, calculate the norm of the scaled x */ @@ -255,7 +257,7 @@ LevenbergMarquardt::minimizeOneStep(FVectorType &x) /* compute the norm of the scaled gradient. */ gnorm = 0.; if (fnorm != 0.) - for (j = 0; j < n; ++j) + for (int j = 0; j < n; ++j) if (wa2[permutation.indices()[j]] != 0.) gnorm = std::max(gnorm, ei_abs( fjac.col(j).head(j+1).dot(qtf.head(j+1)/fnorm) / wa2[permutation.indices()[j]])); @@ -431,6 +433,8 @@ template LevenbergMarquardtSpace::Status LevenbergMarquardt::minimizeOptimumStorageOneStep(FVectorType &x) { + assert(x.size()==n); // check the caller is not cheating us + int i, j; bool sing; @@ -606,6 +610,8 @@ LevenbergMarquardtSpace::Status LevenbergMarquardt::minimizeOptimumStorage(FVectorType &x) { LevenbergMarquardtSpace::Status status = minimizeOptimumStorageInit(x); + if (status==LevenbergMarquardtSpace::ImproperInputParameters) + return status; do { status = minimizeOptimumStorageOneStep(x); } while (status==LevenbergMarquardtSpace::Running); diff --git a/unsupported/doc/examples/MatrixFunction.cpp b/unsupported/doc/examples/MatrixFunction.cpp index 075fe7361..9b594cf39 100644 --- a/unsupported/doc/examples/MatrixFunction.cpp +++ b/unsupported/doc/examples/MatrixFunction.cpp @@ -18,5 +18,5 @@ int main() std::cout << "The matrix A is:\n" << A << "\n\n"; std::cout << "The matrix exponential of A is:\n" - << ei_matrix_function(A, expfn) << "\n\n"; + << ei_matrix_function(A, expfn) << "\n\n"; } diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp index 7aea7b361..e68745ad1 100644 --- a/unsupported/test/NonLinearOptimization.cpp +++ b/unsupported/test/NonLinearOptimization.cpp @@ -172,9 +172,9 @@ void testLmder1() info = lm.lmder1(x); // check return value - VERIFY( 1 == info); - VERIFY(lm.nfev==6); - VERIFY(lm.njev==5); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 6); + VERIFY_IS_EQUAL(lm.njev, 5); // check norm VERIFY_IS_APPROX(lm.fvec.blueNorm(), 0.09063596); @@ -201,9 +201,9 @@ void testLmder() info = lm.minimize(x); // check return values - VERIFY( 1 == info); - VERIFY(lm.nfev==6); - VERIFY(lm.njev==5); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 6); + VERIFY_IS_EQUAL(lm.njev, 5); // check norm fnorm = lm.fvec.blueNorm(); @@ -286,9 +286,9 @@ void testHybrj1() info = solver.hybrj1(x); // check return value - VERIFY( 1 == info); - VERIFY(solver.nfev==11); - VERIFY(solver.njev==1); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(solver.nfev, 11); + VERIFY_IS_EQUAL(solver.njev, 1); // check norm VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08); @@ -321,9 +321,9 @@ void testHybrj() info = solver.solve(x); // check return value - VERIFY( 1 == info); - VERIFY(solver.nfev==11); - VERIFY(solver.njev==1); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(solver.nfev, 11); + VERIFY_IS_EQUAL(solver.njev, 1); // check norm VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08); @@ -375,8 +375,8 @@ void testHybrd1() info = solver.hybrd1(x); // check return value - VERIFY( 1 == info); - VERIFY(solver.nfev==20); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(solver.nfev, 20); // check norm VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08); @@ -406,8 +406,8 @@ void testHybrd() info = solver.solveNumericalDiff(x); // check return value - VERIFY( 1 == info); - VERIFY(solver.nfev==14); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(solver.nfev, 14); // check norm VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08); @@ -477,9 +477,9 @@ void testLmstr1() info = lm.lmstr1(x); // check return value - VERIFY( 1 == info); - VERIFY(lm.nfev==6); - VERIFY(lm.njev==5); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 6); + VERIFY_IS_EQUAL(lm.njev, 5); // check norm VERIFY_IS_APPROX(lm.fvec.blueNorm(), 0.09063596); @@ -506,9 +506,9 @@ void testLmstr() info = lm.minimizeOptimumStorage(x); // check return values - VERIFY( 1 == info); - VERIFY(lm.nfev==6); - VERIFY(lm.njev==5); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 6); + VERIFY_IS_EQUAL(lm.njev, 5); // check norm fnorm = lm.fvec.blueNorm(); @@ -562,8 +562,8 @@ void testLmdif1() info = LevenbergMarquardt::lmdif1(functor, x, &nfev); // check return value - VERIFY( 1 == info); - VERIFY(nfev==26); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(nfev, 26); // check norm functor(x, fvec); @@ -593,8 +593,8 @@ void testLmdif() info = lm.minimize(x); // check return values - VERIFY( 1 == info); - VERIFY(lm.nfev==26); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 26); // check norm fnorm = lm.fvec.blueNorm(); @@ -678,9 +678,9 @@ void testNistChwirut2(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 10 == lm.nfev); - VERIFY( 8 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 10); + VERIFY_IS_EQUAL(lm.njev, 8); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02); // check x @@ -699,9 +699,9 @@ void testNistChwirut2(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 7 == lm.nfev); - VERIFY( 6 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 7); + VERIFY_IS_EQUAL(lm.njev, 6); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02); // check x @@ -758,9 +758,9 @@ void testNistMisra1a(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 19 == lm.nfev); - VERIFY( 15 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 19); + VERIFY_IS_EQUAL(lm.njev, 15); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01); // check x @@ -775,9 +775,9 @@ void testNistMisra1a(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 5 == lm.nfev); - VERIFY( 4 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 5); + VERIFY_IS_EQUAL(lm.njev, 4); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01); // check x @@ -844,19 +844,19 @@ void testNistHahn1(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 11== lm.nfev); - VERIFY( 10== lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 11); + VERIFY_IS_EQUAL(lm.njev, 10); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00); // check x - VERIFY_IS_APPROX(x[0], 1.0776351733E+00 ); - VERIFY_IS_APPROX(x[1],-1.2269296921E-01 ); - VERIFY_IS_APPROX(x[2], 4.0863750610E-03 ); + VERIFY_IS_APPROX(x[0], 1.0776351733E+00); + VERIFY_IS_APPROX(x[1],-1.2269296921E-01); + VERIFY_IS_APPROX(x[2], 4.0863750610E-03); VERIFY_IS_APPROX(x[3],-1.426264e-06); // shoulde be : -1.4262662514E-06 - VERIFY_IS_APPROX(x[4],-5.7609940901E-03 ); - VERIFY_IS_APPROX(x[5], 2.4053735503E-04 ); - VERIFY_IS_APPROX(x[6],-1.2314450199E-07 ); + VERIFY_IS_APPROX(x[4],-5.7609940901E-03); + VERIFY_IS_APPROX(x[5], 2.4053735503E-04); + VERIFY_IS_APPROX(x[6],-1.2314450199E-07); /* * Second try @@ -866,9 +866,9 @@ void testNistHahn1(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 11 == lm.nfev); - VERIFY( 10 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 11); + VERIFY_IS_EQUAL(lm.njev, 10); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00); // check x @@ -876,7 +876,7 @@ void testNistHahn1(void) VERIFY_IS_APPROX(x[1], -0.1226933); // should be : -1.2269296921E-01 VERIFY_IS_APPROX(x[2], 0.004086383); // should be : 4.0863750610E-03 VERIFY_IS_APPROX(x[3], -1.426277e-06); // shoulde be : -1.4262662514E-06 - VERIFY_IS_APPROX(x[4],-5.7609940901E-03 ); + VERIFY_IS_APPROX(x[4],-5.7609940901E-03); VERIFY_IS_APPROX(x[5], 0.00024053772); // should be : 2.4053735503E-04 VERIFY_IS_APPROX(x[6], -1.231450e-07); // should be : -1.2314450199E-07 @@ -930,9 +930,9 @@ void testNistMisra1d(void) info = lm.minimize(x); // check return value - VERIFY( 3 == info); - VERIFY( 9 == lm.nfev); - VERIFY( 7 == lm.njev); + VERIFY_IS_EQUAL(info, 3); + VERIFY_IS_EQUAL(lm.nfev, 9); + VERIFY_IS_EQUAL(lm.njev, 7); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02); // check x @@ -947,9 +947,9 @@ void testNistMisra1d(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 4 == lm.nfev); - VERIFY( 3 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 4); + VERIFY_IS_EQUAL(lm.njev, 3); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02); // check x @@ -1008,18 +1008,18 @@ void testNistLanczos1(void) info = lm.minimize(x); // check return value - VERIFY( 2 == info); - VERIFY( 79 == lm.nfev); - VERIFY( 72 == lm.njev); + VERIFY_IS_EQUAL(info, 2); + VERIFY_IS_EQUAL(lm.nfev, 79); + VERIFY_IS_EQUAL(lm.njev, 72); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats // check x - VERIFY_IS_APPROX(x[0], 9.5100000027E-02 ); - VERIFY_IS_APPROX(x[1], 1.0000000001E+00 ); - VERIFY_IS_APPROX(x[2], 8.6070000013E-01 ); - VERIFY_IS_APPROX(x[3], 3.0000000002E+00 ); - VERIFY_IS_APPROX(x[4], 1.5575999998E+00 ); - VERIFY_IS_APPROX(x[5], 5.0000000001E+00 ); + VERIFY_IS_APPROX(x[0], 9.5100000027E-02); + VERIFY_IS_APPROX(x[1], 1.0000000001E+00); + VERIFY_IS_APPROX(x[2], 8.6070000013E-01); + VERIFY_IS_APPROX(x[3], 3.0000000002E+00); + VERIFY_IS_APPROX(x[4], 1.5575999998E+00); + VERIFY_IS_APPROX(x[5], 5.0000000001E+00); /* * Second try @@ -1029,18 +1029,18 @@ void testNistLanczos1(void) info = lm.minimize(x); // check return value - VERIFY( 2 == info); - VERIFY( 9 == lm.nfev); - VERIFY( 8 == lm.njev); + VERIFY_IS_EQUAL(info, 2); + VERIFY_IS_EQUAL(lm.nfev, 9); + VERIFY_IS_EQUAL(lm.njev, 8); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats // check x - VERIFY_IS_APPROX(x[0], 9.5100000027E-02 ); - VERIFY_IS_APPROX(x[1], 1.0000000001E+00 ); - VERIFY_IS_APPROX(x[2], 8.6070000013E-01 ); - VERIFY_IS_APPROX(x[3], 3.0000000002E+00 ); - VERIFY_IS_APPROX(x[4], 1.5575999998E+00 ); - VERIFY_IS_APPROX(x[5], 5.0000000001E+00 ); + VERIFY_IS_APPROX(x[0], 9.5100000027E-02); + VERIFY_IS_APPROX(x[1], 1.0000000001E+00); + VERIFY_IS_APPROX(x[2], 8.6070000013E-01); + VERIFY_IS_APPROX(x[3], 3.0000000002E+00); + VERIFY_IS_APPROX(x[4], 1.5575999998E+00); + VERIFY_IS_APPROX(x[5], 5.0000000001E+00); } @@ -1094,9 +1094,9 @@ void testNistRat42(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 10 == lm.nfev); - VERIFY( 8 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 10); + VERIFY_IS_EQUAL(lm.njev, 8); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00); // check x @@ -1112,9 +1112,9 @@ void testNistRat42(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 6 == lm.nfev); - VERIFY( 5 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 6); + VERIFY_IS_EQUAL(lm.njev, 5); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00); // check x @@ -1172,9 +1172,9 @@ void testNistMGH10(void) info = lm.minimize(x); // check return value - VERIFY( 2 == info); - VERIFY( 284 == lm.nfev); - VERIFY( 249 == lm.njev); + VERIFY_IS_EQUAL(info, 2); + VERIFY_IS_EQUAL(lm.nfev, 284 ); + VERIFY_IS_EQUAL(lm.njev, 249 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01); // check x @@ -1190,9 +1190,9 @@ void testNistMGH10(void) info = lm.minimize(x); // check return value - VERIFY( 3 == info); - VERIFY( 126 == lm.nfev); - VERIFY( 116 == lm.njev); + VERIFY_IS_EQUAL(info, 3); + VERIFY_IS_EQUAL(lm.nfev, 126); + VERIFY_IS_EQUAL(lm.njev, 116); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01); // check x @@ -1251,9 +1251,9 @@ void testNistBoxBOD(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 31 == lm.nfev); - VERIFY( 25 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 31); + VERIFY_IS_EQUAL(lm.njev, 25); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03); // check x @@ -1271,9 +1271,9 @@ void testNistBoxBOD(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 15 == lm.nfev); - VERIFY( 14 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 15 ); + VERIFY_IS_EQUAL(lm.njev, 14 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03); // check x @@ -1333,9 +1333,9 @@ void testNistMGH17(void) info = lm.minimize(x); // check return value - VERIFY( 2 == info); - VERIFY( 602 == lm.nfev); - VERIFY( 545 == lm.njev); + VERIFY_IS_EQUAL(info, 2); + VERIFY_IS_EQUAL(lm.nfev, 602 ); + VERIFY_IS_EQUAL(lm.njev, 545 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05); // check x @@ -1354,9 +1354,9 @@ void testNistMGH17(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 18 == lm.nfev); - VERIFY( 15 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 18); + VERIFY_IS_EQUAL(lm.njev, 15); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05); // check x @@ -1420,9 +1420,9 @@ void testNistMGH09(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 490 == lm.nfev); - VERIFY( 376 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 490 ); + VERIFY_IS_EQUAL(lm.njev, 376 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04); // check x @@ -1440,9 +1440,9 @@ void testNistMGH09(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 18 == lm.nfev); - VERIFY( 16 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 18); + VERIFY_IS_EQUAL(lm.njev, 16); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04); // check x @@ -1503,9 +1503,9 @@ void testNistBennett5(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 758 == lm.nfev); - VERIFY( 744 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 758); + VERIFY_IS_EQUAL(lm.njev, 744); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04); // check x @@ -1521,9 +1521,9 @@ void testNistBennett5(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 203 == lm.nfev); - VERIFY( 192 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 203); + VERIFY_IS_EQUAL(lm.njev, 192); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04); // check x @@ -1591,9 +1591,9 @@ void testNistThurber(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 39 == lm.nfev); - VERIFY( 36== lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 39); + VERIFY_IS_EQUAL(lm.njev, 36); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03); // check x @@ -1616,9 +1616,9 @@ void testNistThurber(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 29 == lm.nfev); - VERIFY( 28 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 29); + VERIFY_IS_EQUAL(lm.njev, 28); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03); // check x @@ -1683,9 +1683,9 @@ void testNistRat43(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 27 == lm.nfev); - VERIFY( 20 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 27); + VERIFY_IS_EQUAL(lm.njev, 20); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03); // check x @@ -1705,9 +1705,9 @@ void testNistRat43(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 9 == lm.nfev); - VERIFY( 8 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 9); + VERIFY_IS_EQUAL(lm.njev, 8); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03); // check x @@ -1768,9 +1768,9 @@ void testNistEckerle4(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 18 == lm.nfev); - VERIFY( 15 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 18); + VERIFY_IS_EQUAL(lm.njev, 15); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03); // check x @@ -1786,9 +1786,9 @@ void testNistEckerle4(void) info = lm.minimize(x); // check return value - VERIFY( 1 == info); - VERIFY( 7 == lm.nfev); - VERIFY( 6 == lm.njev); + VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(lm.nfev, 7); + VERIFY_IS_EQUAL(lm.njev, 6); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03); // check x diff --git a/unsupported/test/matrix_exponential.cpp b/unsupported/test/matrix_exponential.cpp index 86e942edb..61f30334d 100644 --- a/unsupported/test/matrix_exponential.cpp +++ b/unsupported/test/matrix_exponential.cpp @@ -133,7 +133,7 @@ void randomTest(const MatrixType& m, double tol) m1 = MatrixType::Random(rows, cols); m2 = ei_matrix_function(m1, expfn) * ei_matrix_function(-m1, expfn); - std::cout << "randomTest: error funm = " << relerr(identity, m2 * m3); + std::cout << "randomTest: error funm = " << relerr(identity, m2); VERIFY(identity.isApprox(m2, static_cast(tol))); m2 = ei_matrix_exponential(m1) * ei_matrix_exponential(-m1); diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp index 4ff6d7f1e..e40af7b4e 100644 --- a/unsupported/test/matrix_function.cpp +++ b/unsupported/test/matrix_function.cpp @@ -25,6 +25,17 @@ #include "main.h" #include +// Variant of VERIFY_IS_APPROX which uses absolute error instead of +// relative error. +#define VERIFY_IS_APPROX_ABS(a, b) VERIFY(test_isApprox_abs(a, b)) + +template +inline bool test_isApprox_abs(const Type1& a, const Type2& b) +{ + return ((a-b).array().abs() < test_precision()).all(); +} + + // Returns a matrix with eigenvalues clustered around 0, 1 and 2. template MatrixType randomMatrixWithRealEivals(const int size) @@ -37,7 +48,8 @@ MatrixType randomMatrixWithRealEivals(const int size) + ei_random() * Scalar(RealScalar(0.01)); } MatrixType A = MatrixType::Random(size, size); - return A.inverse() * diag * A; + HouseholderQR QRofA(A); + return QRofA.householderQ().inverse() * diag * QRofA.householderQ(); } template ::Scalar>::IsComplex> @@ -69,7 +81,8 @@ struct randomMatrixWithImagEivals } } MatrixType A = MatrixType::Random(size, size); - return A.inverse() * diag * A; + HouseholderQR QRofA(A); + return QRofA.householderQ().inverse() * diag * QRofA.householderQ(); } }; @@ -88,10 +101,12 @@ struct randomMatrixWithImagEivals + ei_random() * Scalar(RealScalar(0.01)); } MatrixType A = MatrixType::Random(size, size); - return A.inverse() * diag * A; + HouseholderQR QRofA(A); + return QRofA.householderQ().inverse() * diag * QRofA.householderQ(); } }; + template void testMatrixExponential(const MatrixType& A) { @@ -99,50 +114,45 @@ void testMatrixExponential(const MatrixType& A) typedef typename NumTraits::Real RealScalar; typedef std::complex ComplexScalar; - for (int i = 0; i < g_repeat; i++) { - VERIFY_IS_APPROX(ei_matrix_exponential(A), - ei_matrix_function(A, StdStemFunctions::exp)); - } + VERIFY_IS_APPROX(ei_matrix_exponential(A), + ei_matrix_function(A, StdStemFunctions::exp)); } template void testHyperbolicFunctions(const MatrixType& A) { - for (int i = 0; i < g_repeat; i++) { - MatrixType sinhA = ei_matrix_sinh(A); - MatrixType coshA = ei_matrix_cosh(A); - MatrixType expA = ei_matrix_exponential(A); - VERIFY_IS_APPROX(sinhA, (expA - expA.inverse())/2); - VERIFY_IS_APPROX(coshA, (expA + expA.inverse())/2); - } + // Need to use absolute error because of possible cancellation when + // adding/subtracting expA and expmA. + MatrixType expA = ei_matrix_exponential(A); + MatrixType expmA = ei_matrix_exponential(-A); + VERIFY_IS_APPROX_ABS(ei_matrix_sinh(A), (expA - expmA) / 2); + VERIFY_IS_APPROX_ABS(ei_matrix_cosh(A), (expA + expmA) / 2); } template void testGonioFunctions(const MatrixType& A) { - typedef ei_traits Traits; - typedef typename Traits::Scalar Scalar; + typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; typedef std::complex ComplexScalar; - typedef Matrix ComplexMatrix; + typedef Matrix ComplexMatrix; ComplexScalar imagUnit(0,1); ComplexScalar two(2,0); - for (int i = 0; i < g_repeat; i++) { - ComplexMatrix Ac = A.template cast(); - - ComplexMatrix exp_iA = ei_matrix_exponential(imagUnit * Ac); - - MatrixType sinA = ei_matrix_sin(A); - ComplexMatrix sinAc = sinA.template cast(); - VERIFY_IS_APPROX(sinAc, (exp_iA - exp_iA.inverse()) / (two*imagUnit)); - - MatrixType cosA = ei_matrix_cos(A); - ComplexMatrix cosAc = cosA.template cast(); - VERIFY_IS_APPROX(cosAc, (exp_iA + exp_iA.inverse()) / 2); - } + ComplexMatrix Ac = A.template cast(); + + ComplexMatrix exp_iA = ei_matrix_exponential(imagUnit * Ac); + ComplexMatrix exp_miA = ei_matrix_exponential(-imagUnit * Ac); + + MatrixType sinA = ei_matrix_sin(A); + ComplexMatrix sinAc = sinA.template cast(); + VERIFY_IS_APPROX_ABS(sinAc, (exp_iA - exp_miA) / (two*imagUnit)); + + MatrixType cosA = ei_matrix_cos(A); + ComplexMatrix cosAc = cosA.template cast(); + VERIFY_IS_APPROX_ABS(cosAc, (exp_iA + exp_miA) / 2); } template