From 3930c9b0869ae2244fbacaba0e83217accfac6c6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 28 Feb 2013 19:31:03 +0100 Subject: [PATCH] Fix "routine is both "inline" and "noinline"" warnings --- Eigen/src/Core/GeneralProduct.h | 62 ++- .../Core/products/GeneralBlockPanelKernel.h | 322 +++++++------- Eigen/src/Core/products/GeneralMatrixMatrix.h | 3 +- Eigen/src/Core/products/GeneralMatrixVector.h | 23 +- .../Core/products/GeneralMatrixVector_MKL.h | 6 +- .../Core/products/SelfadjointMatrixMatrix.h | 24 +- .../products/SelfadjointMatrixMatrix_MKL.h | 10 +- .../Core/products/SelfadjointMatrixVector.h | 10 +- .../products/SelfadjointMatrixVector_MKL.h | 4 +- .../Core/products/TriangularMatrixMatrix.h | 32 +- .../products/TriangularMatrixMatrix_MKL.h | 4 +- .../Core/products/TriangularMatrixVector.h | 20 +- .../products/TriangularMatrixVector_MKL.h | 12 +- .../Core/products/TriangularSolverMatrix.h | 18 +- .../products/TriangularSolverMatrix_MKL.h | 4 +- Eigen/src/SparseCore/SparseMatrix.h | 396 +++++++++--------- Eigen/src/SparseCore/SparseVector.h | 52 +-- 17 files changed, 560 insertions(+), 442 deletions(-) diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 086eac32d..4e6448353 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -222,7 +222,29 @@ class GeneralProduct ***********************************************************************/ namespace internal { -template struct outer_product_selector; + +// Column major +template +EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) +{ + typedef typename Dest::Index Index; + // FIXME make sure lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dest.cols(); + for (Index j=0; j +EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { + typedef typename Dest::Index Index; + // FIXME make sure rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dest.rows(); + for (Index i=0; i struct traits > @@ -235,6 +257,8 @@ template class GeneralProduct : public ProductBase, Lhs, Rhs> { + template struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + public: EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) @@ -257,53 +281,25 @@ class GeneralProduct template inline void evalTo(Dest& dest) const { - internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, set()); + internal::outer_product_selector_run(*this, dest, set(), IsRowMajor()); } template inline void addTo(Dest& dest) const { - internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, add()); + internal::outer_product_selector_run(*this, dest, add(), IsRowMajor()); } template inline void subTo(Dest& dest) const { - internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, sub()); + internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor()); } template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const { - internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, adds(alpha)); + internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor()); } }; -namespace internal { - -template<> struct outer_product_selector { - template - static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, const Func& func) { - typedef typename Dest::Index Index; - // FIXME make sure lhs is sequentially stored - // FIXME not very good if rhs is real and lhs complex while alpha is real too - const Index cols = dest.cols(); - for (Index j=0; j struct outer_product_selector { - template - static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, const Func& func) { - typedef typename Dest::Index Index; - // FIXME make sure rhs is sequentially stored - // FIXME not very good if lhs is real and rhs complex while alpha is real too - const Index rows = dest.rows(); - for (Index i=0; i +EIGEN_DONT_INLINE +void gebp_kernel + ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, + Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB) { Traits traits; @@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4"); } } } -}; + #undef CJMADD @@ -1110,81 +1117,84 @@ EIGEN_ASM_COMMENT("mybegin4"); template struct gemm_pack_lhs { - EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, - Index stride=0, Index offset=0) - { - typedef typename packet_traits::type Packet; - enum { PacketSize = packet_traits::size }; - - EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); - eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) ); - conj_if::IsComplex && Conjugate> cj; - const_blas_data_mapper lhs(_lhs,lhsStride); - Index count = 0; - Index peeled_mc = (rows/Pack1)*Pack1; - for(Index i=0; i=1*PacketSize) A = ploadu(&lhs(i+0*PacketSize, k)); - if(Pack1>=2*PacketSize) B = ploadu(&lhs(i+1*PacketSize, k)); - if(Pack1>=3*PacketSize) C = ploadu(&lhs(i+2*PacketSize, k)); - if(Pack1>=4*PacketSize) D = ploadu(&lhs(i+3*PacketSize, k)); - if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; } - if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; } - if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; } - if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; } - } - } - else - { - for(Index k=0; k=Pack2) - { - if(PanelMode) count += Pack2*offset; - for(Index k=0; k +EIGEN_DONT_INLINE void gemm_pack_lhs + ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset) +{ + typedef typename packet_traits::type Packet; + enum { PacketSize = packet_traits::size }; + + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS"); + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) ); + conj_if::IsComplex && Conjugate> cj; + const_blas_data_mapper lhs(_lhs,lhsStride); + Index count = 0; + Index peeled_mc = (rows/Pack1)*Pack1; + for(Index i=0; i=1*PacketSize) A = ploadu(&lhs(i+0*PacketSize, k)); + if(Pack1>=2*PacketSize) B = ploadu(&lhs(i+1*PacketSize, k)); + if(Pack1>=3*PacketSize) C = ploadu(&lhs(i+2*PacketSize, k)); + if(Pack1>=4*PacketSize) D = ploadu(&lhs(i+3*PacketSize, k)); + if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; } + if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; } + if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; } + if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; } + } + } + else + { + for(Index k=0; k=Pack2) + { + if(PanelMode) count += Pack2*offset; + for(Index k=0; k { typedef typename packet_traits::type Packet; enum { PacketSize = packet_traits::size }; - EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, - Index stride=0, Index offset=0) - { - EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR"); - eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - conj_if::IsComplex && Conjugate> cj; - Index packet_cols = (cols/nr) * nr; - Index count = 0; - for(Index j2=0; j2 +EIGEN_DONT_INLINE void gemm_pack_rhs + ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset) +{ + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR"); + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if::IsComplex && Conjugate> cj; + Index packet_cols = (cols/nr) * nr; + Index count = 0; + for(Index j2=0; j2 struct gemm_pack_rhs { enum { PacketSize = packet_traits::size }; - EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, - Index stride=0, Index offset=0) - { - EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR"); - eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); - conj_if::IsComplex && Conjugate> cj; - Index packet_cols = (cols/nr) * nr; - Index count = 0; - for(Index j2=0; j2 +EIGEN_DONT_INLINE void gemm_pack_rhs + ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset) +{ + EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR"); + eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); + conj_if::IsComplex && Conjugate> cj; + Index packet_cols = (cols/nr) * nr; + Index count = 0; + for(Index j2=0; j2 struct general_matrix_matrix_product { + typedef typename scalar_product_traits::ReturnType ResScalar; static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, @@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth, // vertical panel which is, in practice, a very low number. pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols); - // For each mc x kc block of the lhs's vertical panel... // (==GEPP_VAR1) for(Index i2=0; i2::type RhsPacket; typedef typename conditional::type ResPacket; EIGEN_DONT_INLINE static void run( + Index rows, Index cols, + const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsIncr, + ResScalar* res, Index + #ifdef EIGEN_INTERNAL_DEBUGGING + resIncr + #endif + , RhsScalar alpha); +}; + +template +EIGEN_DONT_INLINE void general_matrix_vector_product::run( Index rows, Index cols, const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsIncr, @@ -274,7 +286,6 @@ EIGEN_DONT_INLINE static void run( } while(Vectorizable); #undef _EIGEN_ACCUMULATE_PACKETS } -}; /* Optimized row-major matrix * vector product: * This algorithm processes 4 rows at onces that allows to both reduce @@ -308,6 +319,15 @@ typedef typename conditional::type RhsPacket; typedef typename conditional::type ResPacket; EIGEN_DONT_INLINE static void run( + Index rows, Index cols, + const LhsScalar* lhs, Index lhsStride, + const RhsScalar* rhs, Index rhsIncr, + ResScalar* res, Index resIncr, + ResScalar alpha); +}; + +template +EIGEN_DONT_INLINE void general_matrix_vector_product::run( Index rows, Index cols, const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsIncr, @@ -545,7 +565,6 @@ EIGEN_DONT_INLINE static void run( #undef _EIGEN_ACCUMULATE_PACKETS } -}; } // end namespace internal diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h index e9de6af3e..1cb9fe6b5 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h @@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv : #define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ template \ struct general_matrix_vector_product { \ -static EIGEN_DONT_INLINE void run( \ +static void run( \ Index rows, Index cols, \ const Scalar* lhs, Index lhsStride, \ const Scalar* rhs, Index rhsIncr, \ @@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \ }; \ template \ struct general_matrix_vector_product { \ -static EIGEN_DONT_INLINE void run( \ +static void run( \ Index rows, Index cols, \ const Scalar* lhs, Index lhsStride, \ const Scalar* rhs, Index rhsIncr, \ @@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv GEMVVector;\ \ -static EIGEN_DONT_INLINE void run( \ +static void run( \ Index rows, Index cols, \ const EIGTYPE* lhs, Index lhsStride, \ const EIGTYPE* rhs, Index rhsIncr, \ diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index e4b21dedd..ee619df99 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -230,6 +230,17 @@ struct product_selfadjoint_matrix +EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, @@ -301,7 +312,6 @@ struct product_selfadjoint_matrix +EIGEN_DONT_INLINE void product_selfadjoint_matrix::run( Index rows, Index cols, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, @@ -353,7 +374,6 @@ struct product_selfadjoint_matrix \ {\ \ - static EIGEN_DONT_INLINE void run( \ + static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ @@ -98,7 +98,7 @@ template \ struct product_selfadjoint_matrix \ {\ - static EIGEN_DONT_INLINE void run( \ + static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ @@ -174,7 +174,7 @@ template \ {\ \ - static EIGEN_DONT_INLINE void run( \ + static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ @@ -224,7 +224,7 @@ template \ struct product_selfadjoint_matrix \ {\ - static EIGEN_DONT_INLINE void run( \ + static void run( \ Index rows, Index cols, \ const EIGTYPE* _lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsStride, \ diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h index 6d736f01c..7c9e3fc98 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -28,6 +28,15 @@ struct selfadjoint_matrix_vector_product { static EIGEN_DONT_INLINE void run( + Index size, + const Scalar* lhs, Index lhsStride, + const Scalar* _rhs, Index rhsIncr, + Scalar* res, + Scalar alpha); +}; + +template +EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product::run( Index size, const Scalar* lhs, Index lhsStride, const Scalar* _rhs, Index rhsIncr, @@ -153,7 +162,6 @@ static EIGEN_DONT_INLINE void run( res[j] += alpha * t2; } } -}; } // end namespace internal diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h index f88d483b6..86684b66d 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv : #define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ template \ struct selfadjoint_matrix_vector_product { \ -static EIGEN_DONT_INLINE void run( \ +static void run( \ Index size, const Scalar* lhs, Index lhsStride, \ const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ enum {\ @@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv SYMVVector;\ \ -static EIGEN_DONT_INLINE void run( \ +static void run( \ Index size, const EIGTYPE* lhs, Index lhsStride, \ const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ { \ diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index c4bc2aa6f..8110507b5 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -96,6 +96,19 @@ struct product_triangular_matrix_matrix& blocking); +}; + +template +EIGEN_DONT_INLINE void product_triangular_matrix_matrix::run( + Index _rows, Index _cols, Index _depth, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, const Scalar& alpha, level3_blocking& blocking) { // strip zeros @@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix struct product_triangular_matrix_matrix + LhsStorageOrder,ConjugateLhs, + RhsStorageOrder,ConjugateRhs,ColMajor,Version> { typedef gebp_traits Traits; enum { @@ -225,6 +237,19 @@ struct product_triangular_matrix_matrix& blocking); +}; + +template +EIGEN_DONT_INLINE void product_triangular_matrix_matrix::run( + Index _rows, Index _cols, Index _depth, + const Scalar* _lhs, Index lhsStride, + const Scalar* _rhs, Index rhsStride, + Scalar* res, Index resStride, const Scalar& alpha, level3_blocking& blocking) { // strip zeros @@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix +EIGEN_DONT_INLINE void triangular_matrix_vector_product + ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha) { static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; Index size = (std::min)(_rows,_cols); @@ -78,7 +84,6 @@ struct triangular_matrix_vector_product struct triangular_matrix_vector_product @@ -89,8 +94,14 @@ struct triangular_matrix_vector_product +EIGEN_DONT_INLINE void triangular_matrix_vector_product + ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride, + const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha) { static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; Index diagSize = (std::min)(_rows,_cols); @@ -141,7 +152,6 @@ struct triangular_matrix_vector_product \ struct triangular_matrix_vector_product { \ - static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ triangular_matrix_vector_product_trmv::run( \ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ @@ -58,7 +58,7 @@ struct triangular_matrix_vector_product \ struct triangular_matrix_vector_product { \ - static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ + static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ triangular_matrix_vector_product_trmv::run( \ _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ @@ -81,8 +81,8 @@ struct triangular_matrix_vector_product_trmv::run( \ @@ -166,8 +166,8 @@ struct triangular_matrix_vector_product_trmv::run( \ diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index a49ea3183..f103eae72 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -18,7 +18,7 @@ namespace internal { template struct triangular_solve_matrix { - static EIGEN_DONT_INLINE void run( + static void run( Index size, Index cols, const Scalar* tri, Index triStride, Scalar* _other, Index otherStride, @@ -39,6 +39,13 @@ template { static EIGEN_DONT_INLINE void run( + Index size, Index otherSize, + const Scalar* _tri, Index triStride, + Scalar* _other, Index otherStride, + level3_blocking& blocking); +}; +template +EIGEN_DONT_INLINE void triangular_solve_matrix::run( Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherStride, @@ -173,7 +180,6 @@ struct triangular_solve_matrix { static EIGEN_DONT_INLINE void run( + Index size, Index otherSize, + const Scalar* _tri, Index triStride, + Scalar* _other, Index otherStride, + level3_blocking& blocking); +}; +template +EIGEN_DONT_INLINE void triangular_solve_matrix::run( Index size, Index otherSize, const Scalar* _tri, Index triStride, Scalar* _other, Index otherStride, @@ -308,7 +321,6 @@ struct triangular_solve_matrix& /*blocking*/) \ @@ -103,7 +103,7 @@ struct triangular_solve_matrix& /*blocking*/) \ diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 5ff01da28..be134d3d3 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -213,7 +213,7 @@ class SparseMatrix * inserted in increasing inner index order, and in O(nnz_j) for a random insertion. * */ - EIGEN_DONT_INLINE Scalar& insert(Index row, Index col) + Scalar& insert(Index row, Index col) { if(isCompressed()) { @@ -434,7 +434,7 @@ class SparseMatrix /** \internal * same as insert(Index,Index) except that the indices are given relative to the storage order */ - EIGEN_DONT_INLINE Scalar& insertByOuterInner(Index j, Index i) + Scalar& insertByOuterInner(Index j, Index i) { return insert(IsRowMajor ? j : i, IsRowMajor ? i : j); } @@ -711,62 +711,7 @@ class SparseMatrix #endif template - EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase& other) - { - const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - if (needToTranspose) - { - // two passes algorithm: - // 1 - compute the number of coeffs per dest inner vector - // 2 - do the actual copy/eval - // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed - typedef typename internal::nested::type OtherCopy; - typedef typename internal::remove_all::type _OtherCopy; - OtherCopy otherCopy(other.derived()); - - SparseMatrix dest(other.rows(),other.cols()); - Eigen::Map > (dest.m_outerIndex,dest.outerSize()).setZero(); - - // pass 1 - // FIXME the above copy could be merged with that pass - for (Index j=0; jswap(dest); - return *this; - } - else - { - if(other.isRValue()) - initAssignment(other.derived()); - // there is no special optimization - return Base::operator=(other.derived()); - } - } + EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase& other); friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m) { @@ -836,111 +781,7 @@ protected: /** \internal * \sa insert(Index,Index) */ - EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col) - { - eigen_assert(isCompressed()); - - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - Index previousOuter = outer; - if (m_outerIndex[outer+1]==0) - { - // we start a new inner vector - while (previousOuter>=0 && m_outerIndex[previousOuter]==0) - { - m_outerIndex[previousOuter] = static_cast(m_data.size()); - --previousOuter; - } - m_outerIndex[outer+1] = m_outerIndex[outer]; - } - - // here we have to handle the tricky case where the outerIndex array - // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g., - // the 2nd inner vector... - bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0)) - && (size_t(m_outerIndex[outer+1]) == m_data.size()); - - size_t startId = m_outerIndex[outer]; - // FIXME let's make sure sizeof(long int) == sizeof(size_t) - size_t p = m_outerIndex[outer+1]; - ++m_outerIndex[outer+1]; - - float reallocRatio = 1; - if (m_data.allocatedSize()<=m_data.size()) - { - // if there is no preallocated memory, let's reserve a minimum of 32 elements - if (m_data.size()==0) - { - m_data.reserve(32); - } - else - { - // we need to reallocate the data, to reduce multiple reallocations - // we use a smart resize algorithm based on the current filling ratio - // in addition, we use float to avoid integers overflows - float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1); - reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size()); - // furthermore we bound the realloc ratio to: - // 1) reduce multiple minor realloc when the matrix is almost filled - // 2) avoid to allocate too much memory when the matrix is almost empty - reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f); - } - } - m_data.resize(m_data.size()+1,reallocRatio); - - if (!isLastVec) - { - if (previousOuter==-1) - { - // oops wrong guess. - // let's correct the outer offsets - for (Index k=0; k<=(outer+1); ++k) - m_outerIndex[k] = 0; - Index k=outer+1; - while(m_outerIndex[k]==0) - m_outerIndex[k++] = 1; - while (k<=m_outerSize && m_outerIndex[k]!=0) - m_outerIndex[k++]++; - p = 0; - --k; - k = m_outerIndex[k]-1; - while (k>0) - { - m_data.index(k) = m_data.index(k-1); - m_data.value(k) = m_data.value(k-1); - k--; - } - } - else - { - // we are not inserting into the last inner vec - // update outer indices: - Index j = outer+2; - while (j<=m_outerSize && m_outerIndex[j]!=0) - m_outerIndex[j++]++; - --j; - // shift data of last vecs: - Index k = m_outerIndex[j]-1; - while (k>=Index(p)) - { - m_data.index(k) = m_data.index(k-1); - m_data.value(k) = m_data.value(k-1); - k--; - } - } - } - - while ( (p > startId) && (m_data.index(p-1) > inner) ) - { - m_data.index(p) = m_data.index(p-1); - m_data.value(p) = m_data.value(p-1); - --p; - } - - m_data.index(p) = inner; - return (m_data.value(p) = 0); - } + EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col); /** \internal * A vector object that is equal to 0 everywhere but v at the position i */ @@ -959,36 +800,7 @@ protected: /** \internal * \sa insert(Index,Index) */ - EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col) - { - eigen_assert(!isCompressed()); - - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - std::ptrdiff_t room = m_outerIndex[outer+1] - m_outerIndex[outer]; - std::ptrdiff_t innerNNZ = m_innerNonZeros[outer]; - if(innerNNZ>=room) - { - // this inner vector is full, we need to reallocate the whole buffer :( - reserve(SingletonVector(outer,std::max(2,innerNNZ))); - } - - Index startId = m_outerIndex[outer]; - Index p = startId + m_innerNonZeros[outer]; - while ( (p > startId) && (m_data.index(p-1) > inner) ) - { - m_data.index(p) = m_data.index(p-1); - m_data.value(p) = m_data.value(p-1); - --p; - } - eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end"); - - m_innerNonZeros[outer]++; - - m_data.index(p) = inner; - return (m_data.value(p) = 0); - } + EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col); public: /** \internal @@ -1205,6 +1017,204 @@ void SparseMatrix::sumupDuplicates() m_data.resize(m_outerIndex[m_outerSize]); } +template +template +EIGEN_DONT_INLINE SparseMatrix& SparseMatrix::operator=(const SparseMatrixBase& other) +{ + const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); + if (needToTranspose) + { + // two passes algorithm: + // 1 - compute the number of coeffs per dest inner vector + // 2 - do the actual copy/eval + // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed + typedef typename internal::nested::type OtherCopy; + typedef typename internal::remove_all::type _OtherCopy; + OtherCopy otherCopy(other.derived()); + + SparseMatrix dest(other.rows(),other.cols()); + Eigen::Map > (dest.m_outerIndex,dest.outerSize()).setZero(); + + // pass 1 + // FIXME the above copy could be merged with that pass + for (Index j=0; jswap(dest); + return *this; + } + else + { + if(other.isRValue()) + initAssignment(other.derived()); + // there is no special optimization + return Base::operator=(other.derived()); + } +} + +template +EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col) +{ + eigen_assert(!isCompressed()); + + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + std::ptrdiff_t room = m_outerIndex[outer+1] - m_outerIndex[outer]; + std::ptrdiff_t innerNNZ = m_innerNonZeros[outer]; + if(innerNNZ>=room) + { + // this inner vector is full, we need to reallocate the whole buffer :( + reserve(SingletonVector(outer,std::max(2,innerNNZ))); + } + + Index startId = m_outerIndex[outer]; + Index p = startId + m_innerNonZeros[outer]; + while ( (p > startId) && (m_data.index(p-1) > inner) ) + { + m_data.index(p) = m_data.index(p-1); + m_data.value(p) = m_data.value(p-1); + --p; + } + eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end"); + + m_innerNonZeros[outer]++; + + m_data.index(p) = inner; + return (m_data.value(p) = 0); +} + +template +EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertCompressed(Index row, Index col) +{ + eigen_assert(isCompressed()); + + const Index outer = IsRowMajor ? row : col; + const Index inner = IsRowMajor ? col : row; + + Index previousOuter = outer; + if (m_outerIndex[outer+1]==0) + { + // we start a new inner vector + while (previousOuter>=0 && m_outerIndex[previousOuter]==0) + { + m_outerIndex[previousOuter] = static_cast(m_data.size()); + --previousOuter; + } + m_outerIndex[outer+1] = m_outerIndex[outer]; + } + + // here we have to handle the tricky case where the outerIndex array + // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g., + // the 2nd inner vector... + bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0)) + && (size_t(m_outerIndex[outer+1]) == m_data.size()); + + size_t startId = m_outerIndex[outer]; + // FIXME let's make sure sizeof(long int) == sizeof(size_t) + size_t p = m_outerIndex[outer+1]; + ++m_outerIndex[outer+1]; + + float reallocRatio = 1; + if (m_data.allocatedSize()<=m_data.size()) + { + // if there is no preallocated memory, let's reserve a minimum of 32 elements + if (m_data.size()==0) + { + m_data.reserve(32); + } + else + { + // we need to reallocate the data, to reduce multiple reallocations + // we use a smart resize algorithm based on the current filling ratio + // in addition, we use float to avoid integers overflows + float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1); + reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size()); + // furthermore we bound the realloc ratio to: + // 1) reduce multiple minor realloc when the matrix is almost filled + // 2) avoid to allocate too much memory when the matrix is almost empty + reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f); + } + } + m_data.resize(m_data.size()+1,reallocRatio); + + if (!isLastVec) + { + if (previousOuter==-1) + { + // oops wrong guess. + // let's correct the outer offsets + for (Index k=0; k<=(outer+1); ++k) + m_outerIndex[k] = 0; + Index k=outer+1; + while(m_outerIndex[k]==0) + m_outerIndex[k++] = 1; + while (k<=m_outerSize && m_outerIndex[k]!=0) + m_outerIndex[k++]++; + p = 0; + --k; + k = m_outerIndex[k]-1; + while (k>0) + { + m_data.index(k) = m_data.index(k-1); + m_data.value(k) = m_data.value(k-1); + k--; + } + } + else + { + // we are not inserting into the last inner vec + // update outer indices: + Index j = outer+2; + while (j<=m_outerSize && m_outerIndex[j]!=0) + m_outerIndex[j++]++; + --j; + // shift data of last vecs: + Index k = m_outerIndex[j]-1; + while (k>=Index(p)) + { + m_data.index(k) = m_data.index(k-1); + m_data.value(k) = m_data.value(k-1); + k--; + } + } + } + + while ( (p > startId) && (m_data.index(p-1) > inner) ) + { + m_data.index(p) = m_data.index(p-1); + m_data.value(p) = m_data.value(p-1); + --p; + } + + m_data.index(p) = inner; + return (m_data.value(p) = 0); +} + } // end namespace Eigen #endif // EIGEN_SPARSEMATRIX_H diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h index a9c8979cf..5bfd041ed 100644 --- a/Eigen/src/SparseCore/SparseVector.h +++ b/Eigen/src/SparseCore/SparseVector.h @@ -309,30 +309,7 @@ class SparseVector protected: template - EIGEN_DONT_INLINE SparseVector& assign(const SparseMatrixBase& _other) - { - const OtherDerived& other(_other.derived()); - const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - if(needToTranspose) - { - Index size = other.size(); - Index nnz = other.nonZeros(); - resize(size); - reserve(nnz); - for(Index i=0; i& _other); Storage m_data; Index m_size; @@ -402,6 +379,33 @@ class SparseVector::ReverseInnerIterator const Index m_start; }; +template +template +EIGEN_DONT_INLINE SparseVector& SparseVector::assign(const SparseMatrixBase& _other) +{ + const OtherDerived& other(_other.derived()); + const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); + if(needToTranspose) + { + Index size = other.size(); + Index nnz = other.nonZeros(); + resize(size); + reserve(nnz); + for(Index i=0; i