diff --git a/Eigen/SparseCore b/Eigen/SparseCore index b2db46ba8..352f1ecec 100644 --- a/Eigen/SparseCore +++ b/Eigen/SparseCore @@ -17,6 +17,7 @@ #include #include #include +#include /** * \defgroup SparseCore_Module SparseCore module diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index 733b1aa73..d89d99ea5 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -132,15 +132,7 @@ class CompressedStorage /** \returns the largest \c k in [start,end) such that for all \c j in [start,k) index[\c j]\<\a key */ inline Index searchLowerIndex(Index start, Index end, Index key) const { - while(end>start) - { - Index mid = (end+start)>>1; - if (m_indices[mid](std::distance(m_indices, std::lower_bound(m_indices + start, m_indices + end, key))); } /** \returns the stored value at index \a key @@ -198,19 +190,12 @@ class CompressedStorage return m_values[id]; } - void moveChunk(Index from, Index to, Index chunkSize) + inline void moveChunk(Index from, Index to, Index chunkSize) { - eigen_internal_assert(to+chunkSize <= m_size); - if(to>from && from+chunkSize>to) - { - // move backward - internal::smart_memmove(m_values+from, m_values+from+chunkSize, m_values+to); - internal::smart_memmove(m_indices+from, m_indices+from+chunkSize, m_indices+to); - } - else - { - internal::smart_copy(m_values+from, m_values+from+chunkSize, m_values+to); - internal::smart_copy(m_indices+from, m_indices+from+chunkSize, m_indices+to); + eigen_internal_assert(chunkSize >= 0 && to+chunkSize <= m_size); + if (chunkSize > 0) { + internal::smart_memmove(m_values + from, m_values + from + chunkSize, m_values + to); + internal::smart_memmove(m_indices + from, m_indices + from + chunkSize, m_indices + to); } } diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 68068124f..4dea78829 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -196,7 +196,7 @@ class SparseMatrix const Index outer = IsRowMajor ? row : col; const Index inner = IsRowMajor ? col : row; Index end = m_innerNonZeros ? m_outerIndex[outer] + m_innerNonZeros[outer] : m_outerIndex[outer+1]; - return m_data.atInRange(m_outerIndex[outer], end, StorageIndex(inner)); + return m_data.atInRange(m_outerIndex[outer], end, inner); } /** \returns a non-const reference to the value of the matrix at position \a i, \a j @@ -210,20 +210,17 @@ class SparseMatrix inline Scalar& coeffRef(Index row, Index col) { eigen_assert(row>=0 && row=0 && col=start && "you probably called coeffRef on a non finalized matrix"); - if(end<=start) - return insert(row,col); - const Index p = m_data.searchLowerIndex(start,end-1,StorageIndex(inner)); - if((p= start && "you probably called coeffRef on a non finalized matrix"); + if (end <= start) return insertAtByOuterInner(outer, inner, start); + Index dst = m_data.searchLowerIndex(start, end, inner); + if ((dst < end) && (m_data.index(dst) == inner)) + return m_data.value(dst); else - return insert(row,col); + return insertAtByOuterInner(outer, inner, dst); } /** \returns a reference to a novel non zero coefficient with coordinates \a row x \a col. @@ -241,7 +238,7 @@ class SparseMatrix * if the elements of each inner vector are inserted in increasing inner index order, and in O(nnz_j) for a random insertion. * */ - Scalar& insert(Index row, Index col); + inline Scalar& insert(Index row, Index col); public: @@ -301,6 +298,11 @@ class SparseMatrix if(isCompressed()) { Index totalReserveSize = 0; + for (Index j = 0; j < m_outerSize; ++j) totalReserveSize += reserveSizes[j]; + + // if reserveSizes is empty, don't do anything! + if (totalReserveSize == 0) return; + // turn the matrix into non-compressed mode m_innerNonZeros = internal::conditional_aligned_new_auto(m_outerSize); @@ -312,18 +314,18 @@ class SparseMatrix { newOuterIndex[j] = count; count += reserveSizes[j] + (m_outerIndex[j+1]-m_outerIndex[j]); - totalReserveSize += reserveSizes[j]; } + m_data.reserve(totalReserveSize); StorageIndex previousOuterIndex = m_outerIndex[m_outerSize]; for(Index j=m_outerSize-1; j>=0; --j) { StorageIndex innerNNZ = previousOuterIndex - m_outerIndex[j]; - for(Index i=innerNNZ-1; i>=0; --i) - { - m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i); - m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i); - } + StorageIndex begin = m_outerIndex[j]; + StorageIndex end = begin + innerNNZ; + StorageIndex target = newOuterIndex[j]; + internal::smart_memmove(innerIndexPtr() + begin, innerIndexPtr() + end, innerIndexPtr() + target); + internal::smart_memmove(valuePtr() + begin, valuePtr() + end, valuePtr() + target); previousOuterIndex = m_outerIndex[j]; m_outerIndex[j] = newOuterIndex[j]; m_innerNonZeros[j] = innerNNZ; @@ -350,15 +352,15 @@ class SparseMatrix m_data.resize(count); for(Index j=m_outerSize-1; j>=0; --j) { - Index offset = newOuterIndex[j] - m_outerIndex[j]; + StorageIndex offset = newOuterIndex[j] - m_outerIndex[j]; if(offset>0) { StorageIndex innerNNZ = m_innerNonZeros[j]; - for(Index i=innerNNZ-1; i>=0; --i) - { - m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i); - m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i); - } + StorageIndex begin = m_outerIndex[j]; + StorageIndex end = begin + innerNNZ; + StorageIndex target = newOuterIndex[j]; + internal::smart_memmove(innerIndexPtr() + begin, innerIndexPtr() + end, innerIndexPtr() + target); + internal::smart_memmove(valuePtr() + begin, valuePtr() + end, valuePtr() + target); } } @@ -392,7 +394,7 @@ class SparseMatrix { eigen_assert(Index(m_outerIndex[outer+1]) == m_data.size() && "Invalid ordered insertion (invalid outer index)"); eigen_assert( (m_outerIndex[outer+1]-m_outerIndex[outer]==0 || m_data.index(m_data.size()-1) void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); - void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } + template + void collapseDuplicates(DenseBase& wi, DupFunctor dup_func = DupFunctor()); - template - void collapseDuplicates(DupFunctor dup_func = DupFunctor()); + template + void setFromSortedTriplets(const InputIterators& begin, const InputIterators& end); + + template + void setFromSortedTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); //--- @@ -464,27 +470,23 @@ class SparseMatrix */ void makeCompressed() { - if(isCompressed()) - return; + if (isCompressed()) return; eigen_internal_assert(m_outerIndex!=0 && m_outerSize>0); - Index oldStart = m_outerIndex[1]; + StorageIndex start = m_outerIndex[1]; m_outerIndex[1] = m_innerNonZeros[0]; for(Index j=1; j0) + StorageIndex end = start + m_innerNonZeros[j]; + StorageIndex target = m_outerIndex[j]; + if (start != target) { - for(Index k=0; k(m_innerNonZeros, m_outerSize); m_innerNonZeros = 0; @@ -495,13 +497,12 @@ class SparseMatrix /** Turns the matrix into the uncompressed mode */ void uncompress() { - if(m_innerNonZeros != 0) - return; + if(m_innerNonZeros != 0) return; m_innerNonZeros = internal::conditional_aligned_new_auto(m_outerSize); - for (Index i = 0; i < m_outerSize; i++) - { - m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; - } + typename IndexVector::AlignedMapType innerNonZeroMap(m_innerNonZeros, m_outerSize); + typename IndexVector::ConstAlignedMapType outerIndexMap(m_outerIndex, m_outerSize); + typename IndexVector::ConstMapType nextOuterIndexMap(m_outerIndex + 1, m_outerSize); + innerNonZeroMap = nextOuterIndexMap - outerIndexMap; } /** Suppresses all nonzeros which are \b much \b smaller \b than \a reference under the tolerance \a epsilon */ @@ -520,27 +521,32 @@ class SparseMatrix template void prune(const KeepFunc& keep = KeepFunc()) { - // TODO optimize the uncompressed mode to avoid moving and allocating the data twice - makeCompressed(); - StorageIndex k = 0; for(Index j=0; jrows() == rows && this->cols() == cols) return; - + void conservativeResize(Index rows, Index cols) { + // If one dimension is null, then there is nothing to be preserved - if(rows==0 || cols==0) return resize(rows,cols); + if (rows == 0 || cols == 0) return resize(rows, cols); - Index innerChange = IsRowMajor ? cols - this->cols() : rows - this->rows(); - Index outerChange = IsRowMajor ? rows - this->rows() : cols - this->cols(); - StorageIndex newInnerSize = convert_index(IsRowMajor ? cols : rows); + Index newOuterSize = IsRowMajor ? rows : cols; + Index newInnerSize = IsRowMajor ? cols : rows; - // Deals with inner non zeros - if (m_innerNonZeros) - { - // Resize m_innerNonZeros - m_innerNonZeros = internal::conditional_aligned_realloc_new_auto( - m_innerNonZeros, m_outerSize + outerChange, m_outerSize); - - for(Index i=m_outerSize; i(m_outerSize + outerChange); - for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++) - m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; - for(Index i = m_outerSize; i < m_outerSize + outerChange; i++) - m_innerNonZeros[i] = 0; - } - - // Change the m_innerNonZeros in case of a decrease of inner size - if (m_innerNonZeros && innerChange < 0) - { - for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++) - { - StorageIndex &n = m_innerNonZeros[i]; - StorageIndex start = m_outerIndex[i]; - while (n > 0 && m_data.index(start+n-1) >= newInnerSize) --n; + Index innerChange = newInnerSize - innerSize(); + Index outerChange = newOuterSize - outerSize(); + + if (outerChange != 0) { + m_outerIndex = internal::conditional_aligned_realloc_new_auto( + outerIndexPtr(), newOuterSize + 1, outerSize() + 1); + + if (!isCompressed()) + m_innerNonZeros = internal::conditional_aligned_realloc_new_auto( + innerNonZeroPtr(), newOuterSize, outerSize()); + + if (outerChange > 0) { + StorageIndex lastIdx = outerSize() == 0 ? StorageIndex(0) : outerIndexPtr()[outerSize()]; + std::fill_n(outerIndexPtr() + outerSize(), outerChange + 1, lastIdx); + + if (!isCompressed()) std::fill_n(innerNonZeroPtr() + outerSize(), outerChange, StorageIndex(0)); + } + } + m_outerSize = newOuterSize; + + if (innerChange < 0) { + for (Index j = 0; j < outerSize(); j++) { + Index start = outerIndexPtr()[j]; + Index end = isCompressed() ? outerIndexPtr()[j + 1] : start + innerNonZeroPtr()[j]; + Index lb = data().searchLowerIndex(start, end, newInnerSize); + if (lb != end) { + uncompress(); + innerNonZeroPtr()[j] = StorageIndex(lb - start); + } } } - m_innerSize = newInnerSize; - // Re-allocate outer index structure if necessary - if (outerChange == 0) - return; - - m_outerIndex = internal::conditional_aligned_realloc_new_auto( - m_outerIndex, m_outerSize + outerChange + 1, m_outerSize + 1); - if (outerChange > 0) - { - StorageIndex lastIdx = m_outerSize == 0 ? 0 : m_outerIndex[m_outerSize]; - for(Index i=m_outerSize; im_data.resize(rows()); - Eigen::Map(this->m_data.indexPtr(), rows()).setLinSpaced(0, StorageIndex(rows()-1)); - Eigen::Map(this->m_data.valuePtr(), rows()).setOnes(); - Eigen::Map(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows())); - internal::conditional_aligned_delete_auto(m_innerNonZeros, m_outerSize); - m_innerNonZeros = 0; + if (m_innerNonZeros) { + internal::conditional_aligned_delete_auto(m_innerNonZeros, m_outerSize); + m_innerNonZeros = 0; + } + m_data.resize(rows()); + // is it necessary to squeeze? + m_data.squeeze(); + typename IndexVector::AlignedMapType outerIndexMap(outerIndexPtr(), rows() + 1); + typename IndexVector::AlignedMapType innerIndexMap(innerIndexPtr(), rows()); + typename ScalarVector::AlignedMapType valueMap(valuePtr(), rows()); + outerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1)); + innerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1)); + valueMap.setOnes(); } inline SparseMatrix& operator=(const SparseMatrix& other) @@ -865,7 +864,7 @@ protected: /** \internal * \sa insert(Index,Index) */ - EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col); + EIGEN_DEPRECATED EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col); /** \internal * A vector object that is equal to 0 everywhere but v at the position i */ @@ -884,7 +883,7 @@ protected: /** \internal * \sa insert(Index,Index) */ - EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col); + EIGEN_DEPRECATED EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col); public: /** \internal @@ -913,17 +912,18 @@ protected: * 1 - if *this is overwritten (Func==assign_op) or *this is empty, then we can work treat *this as a dense vector expression. * 2 - otherwise, for each diagonal coeff, * 2.a - if it already exists, then we update it, - * 2.b - otherwise, if *this is uncompressed and that the current inner-vector has empty room for at least 1 element, then we perform an in-place insertion. - * 2.c - otherwise, we'll have to reallocate and copy everything, so instead of doing so for each new element, it is recorded in a std::vector. - * 3 - at the end, if some entries failed to be inserted in-place, then we alloc a new buffer, copy each chunk at the right position, and insert the new elements. - * - * TODO: some piece of code could be isolated and reused for a general in-place update strategy. - * TODO: if we start to defer the insertion of some elements (i.e., case 2.c executed once), - * then it *might* be better to disable case 2.b since they will have to be copied anyway. + * 2.b - if the correct position is at the end of the vector, and there is capacity, push to back + * 2.b - otherwise, the insertion requires a data move, record insertion locations and handle in a second pass + * 3 - at the end, if some entries failed to be updated in-place, then we alloc a new buffer, copy each chunk at the right position, and insert the new elements. */ template void assignDiagonal(const DiagXpr diagXpr, const Func& assignFunc) { + + constexpr StorageIndex kEmptyIndexVal(-1); + typedef typename IndexVector::AlignedMapType IndexMap; + typedef typename ScalarVector::AlignedMapType ValueMap; + Index n = diagXpr.size(); const bool overwrite = internal::is_same >::value; @@ -933,82 +933,106 @@ protected: this->resize(n, n); } - if(m_data.size()==0 || overwrite) + if(data().size()==0 || overwrite) { - typedef Array ArrayXI; - this->makeCompressed(); - this->resizeNonZeros(n); - Eigen::Map(this->innerIndexPtr(), n).setLinSpaced(0,StorageIndex(n)-1); - Eigen::Map(this->outerIndexPtr(), n+1).setLinSpaced(0,StorageIndex(n)); - Eigen::Map > values = this->coeffs(); - values.setZero(); - internal::call_assignment_no_alias(values, diagXpr, assignFunc); + if (!isCompressed()) { + internal::conditional_aligned_delete_auto(m_innerNonZeros, m_outerSize); + m_innerNonZeros = 0; + } + resizeNonZeros(n); + IndexMap outerIndexMap(outerIndexPtr(), n + 1); + IndexMap innerIndexMap(innerIndexPtr(), n); + ValueMap valueMap(valuePtr(), n); + outerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1)); + innerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1)); + valueMap.setZero(); + internal::call_assignment_no_alias(valueMap, diagXpr, assignFunc); } else { - bool isComp = isCompressed(); - internal::evaluator diaEval(diagXpr); - std::vector newEntries; + internal::evaluator diaEval(diagXpr); - // 1 - try in-place update and record insertion failures - for(Index i = 0; ilower_bound(i,i); - Index p = lb.value; - if(lb.found) - { - // the coeff already exists - assignFunc.assignCoeff(m_data.value(p), diaEval.coeff(i)); + ei_declare_aligned_stack_constructed_variable(StorageIndex, tmp, n, 0); + typename IndexVector::AlignedMapType insertionLocations(tmp, n); + insertionLocations.setConstant(kEmptyIndexVal); + + Index deferredInsertions = 0; + Index shift = 0; + + for (Index j = 0; j < n; j++) { + Index begin = outerIndexPtr()[j]; + Index end = isCompressed() ? outerIndexPtr()[j + 1] : begin + innerNonZeroPtr()[j]; + Index capacity = outerIndexPtr()[j + 1] - end; + Index dst = data().searchLowerIndex(begin, end, j); + // the entry exists: update it now + if (dst != end && data().index(dst) == j) assignFunc.assignCoeff(data().value(dst), diaEval.coeff(j)); + // the entry belongs at the back of the vector: push to back + else if (dst == end && capacity > 0) + assignFunc.assignCoeff(insertBackUncompressed(j, j), diaEval.coeff(j)); + // the insertion requires a data move, record insertion location and handle in second pass + else { + insertionLocations.coeffRef(j) = dst; + deferredInsertions++; + // if there is no capacity, all vectors to the right of this are shifted + if (capacity == 0) shift++; + } } - else if((!isComp) && m_innerNonZeros[i] < (m_outerIndex[i+1]-m_outerIndex[i])) - { - // non compressed mode with local room for inserting one element - m_data.moveChunk(p, p+1, m_outerIndex[i]+m_innerNonZeros[i]-p); - m_innerNonZeros[i]++; - m_data.value(p) = Scalar(0); - m_data.index(p) = StorageIndex(i); - assignFunc.assignCoeff(m_data.value(p), diaEval.coeff(i)); - } - else - { - // defer insertion - newEntries.push_back(IndexPosPair(i,p)); - } - } - // 2 - insert deferred entries - Index n_entries = Index(newEntries.size()); - if(n_entries>0) - { - Storage newData(m_data.size()+n_entries); - Index prev_p = 0; - Index prev_i = 0; - for(Index k=0; k 0) { + + data().resize(data().size() + shift); + Index copyEnd = isCompressed() ? outerIndexPtr()[outerSize()] + : outerIndexPtr()[outerSize() - 1] + innerNonZeroPtr()[outerSize() - 1]; + for (Index j = outerSize() - 1; deferredInsertions > 0; j--) { + Index begin = outerIndexPtr()[j]; + Index end = isCompressed() ? outerIndexPtr()[j + 1] : begin + innerNonZeroPtr()[j]; + Index capacity = outerIndexPtr()[j + 1] - end; + + bool doInsertion = insertionLocations(j) >= 0; + bool breakUpCopy = doInsertion && (capacity > 0); + // break up copy for sorted insertion into inactive nonzeros + // optionally, add another criterium, i.e. 'breakUpCopy || (capacity > threhsold)' + // where `threshold >= 0` to skip inactive nonzeros in each vector + // this reduces the total number of copied elements, but requires more moveChunk calls + if (breakUpCopy) { + Index copyBegin = outerIndexPtr()[j + 1]; + Index to = copyBegin + shift; + Index chunkSize = copyEnd - copyBegin; + if (chunkSize > 0) data().moveChunk(copyBegin, to, chunkSize); + copyEnd = end; + } + + outerIndexPtr()[j + 1] += shift; + + if (doInsertion) { + // if there is capacity, shift into the inactive nonzeros + if (capacity > 0) shift++; + Index copyBegin = insertionLocations(j); + Index to = copyBegin + shift; + Index chunkSize = copyEnd - copyBegin; + if (chunkSize > 0) data().moveChunk(copyBegin, to, chunkSize); + Index dst = to - 1; + data().index(dst) = StorageIndex(j); + assignFunc.assignCoeff(data().value(dst) = Scalar(0), diaEval.coeff(j)); + if (!isCompressed()) innerNonZeroPtr()[j]++; + shift--; + deferredInsertions--; + copyEnd = copyBegin; + } + } + } + eigen_assert((shift == 0) && (deferredInsertions == 0)); } } + /* Provides a consistent reserve and reallocation strategy for insertCompressed and insertUncompressed + If there is insufficient space for one insertion, the size of the memory buffer is doubled */ + inline void checkAllocatedSpaceAndMaybeExpand(); + /* These functions are used to avoid a redundant binary search operation in functions such as coeffRef() and assume `dst` is the appropriate sorted insertion point */ + EIGEN_STRONG_INLINE Scalar& insertAtByOuterInner(Index outer, Index inner, Index dst); + Scalar& insertCompressedAtByOuterInner(Index outer, Index inner, Index dst); + Scalar& insertUncompressedAtByOuterInner(Index outer, Index inner, Index dst); + private: EIGEN_STATIC_ASSERT(NumTraits::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE) EIGEN_STATIC_ASSERT((Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS) @@ -1026,36 +1050,106 @@ private: namespace internal { -template -void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, DupFunctor dup_func) -{ - enum { IsRowMajor = SparseMatrixType::IsRowMajor }; - typedef typename SparseMatrixType::Scalar Scalar; +// Creates a compressed sparse matrix from a range of unsorted triplets +// Requires temporary storage to handle duplicate entries +template +void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, + DupFunctor dup_func) { + + constexpr bool IsRowMajor = SparseMatrixType::IsRowMajor; typedef typename SparseMatrixType::StorageIndex StorageIndex; - SparseMatrix trMat(mat.rows(),mat.cols()); + typedef typename VectorX::AlignedMapType IndexMap; + if (begin == end) return; - if(begin!=end) - { - // pass 1: count the nnz per inner-vector - typename SparseMatrixType::IndexVector wi(trMat.outerSize()); - wi.setZero(); - for(InputIterator it(begin); it!=end; ++it) - { - eigen_assert(it->row()>=0 && it->row()col()>=0 && it->col()col() : it->row())++; - } - - // pass 2: insert all the elements into trMat - trMat.reserve(wi); - for(InputIterator it(begin); it!=end; ++it) - trMat.insertBackUncompressed(it->row(),it->col()) = it->value(); - - // pass 3: - trMat.collapseDuplicates(dup_func); + // free innerNonZeroPtr (if present) and zero outerIndexPtr + mat.resize(mat.rows(), mat.cols()); + // allocate temporary storage for nonzero insertion (outer size) and duplicate removal (inner size) + ei_declare_aligned_stack_constructed_variable(StorageIndex, tmp, numext::maxi(mat.innerSize(), mat.outerSize()), 0); + // scan triplets to determine allocation size before constructing matrix + IndexMap outerIndexMap(mat.outerIndexPtr(), mat.outerSize() + 1); + for (InputIterator it(begin); it != end; ++it) { + eigen_assert(it->row() >= 0 && it->row() < mat.rows() && it->col() >= 0 && it->col() < mat.cols()); + StorageIndex j = IsRowMajor ? it->row() : it->col(); + outerIndexMap.coeffRef(j + 1)++; } - // pass 4: transposed copy -> implicit sorting - mat = trMat; + // finalize outer indices and allocate memory + std::partial_sum(outerIndexMap.begin(), outerIndexMap.end(), outerIndexMap.begin()); + Index nonZeros = mat.outerIndexPtr()[mat.outerSize()]; + mat.resizeNonZeros(nonZeros); + + // use tmp to track nonzero insertions + IndexMap back(tmp, mat.outerSize()); + back = outerIndexMap.head(mat.outerSize()); + + // push triplets to back of each inner vector + for (InputIterator it(begin); it != end; ++it) { + StorageIndex j = IsRowMajor ? it->row() : it->col(); + StorageIndex i = IsRowMajor ? it->col() : it->row(); + mat.data().index(back.coeff(j)) = i; + mat.data().value(back.coeff(j)) = it->value(); + back.coeffRef(j)++; + } + + // use tmp to collapse duplicates + IndexMap wi(tmp, mat.innerSize()); + mat.collapseDuplicates(wi, dup_func); + mat.sortInnerIndices(); +} + +// Creates a compressed sparse matrix from a sorted range of triplets +template +void set_from_triplets_sorted(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, + DupFunctor dup_func) { + constexpr bool IsRowMajor = SparseMatrixType::IsRowMajor; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + typedef typename VectorX::AlignedMapType IndexMap; + if (begin == end) return; + + constexpr StorageIndex kEmptyIndexValue(-1); + // deallocate inner nonzeros if present and zero outerIndexPtr + mat.resize(mat.rows(), mat.cols()); + // use outer indices to count non zero entries (excluding duplicate entries) + StorageIndex previous_j = kEmptyIndexValue; + StorageIndex previous_i = kEmptyIndexValue; + // scan triplets to determine allocation size before constructing matrix + IndexMap outerIndexMap(mat.outerIndexPtr(), mat.outerSize() + 1); + for (InputIterator it(begin); it != end; ++it) { + eigen_assert(it->row() >= 0 && it->row() < mat.rows() && it->col() >= 0 && it->col() < mat.cols()); + StorageIndex j = IsRowMajor ? it->row() : it->col(); + StorageIndex i = IsRowMajor ? it->col() : it->row(); + eigen_assert(j > previous_j || (j == previous_j && i >= previous_i)); + // identify duplicates by examining previous location + bool duplicate = (previous_j == j) && (previous_i == i); + if (!duplicate) outerIndexMap.coeffRef(j + 1)++; + previous_j = j; + previous_i = i; + } + + // finalize outer indices and allocate memory + std::partial_sum(outerIndexMap.begin(), outerIndexMap.end(), outerIndexMap.begin()); + Index nonZeros = mat.outerIndexPtr()[mat.outerSize()]; + mat.resizeNonZeros(nonZeros); + + previous_i = kEmptyIndexValue; + previous_j = kEmptyIndexValue; + Index back = 0; + for (InputIterator it(begin); it != end; ++it) { + StorageIndex j = IsRowMajor ? it->row() : it->col(); + StorageIndex i = IsRowMajor ? it->col() : it->row(); + bool duplicate = (previous_j == j) && (previous_i == i); + if (duplicate) { + mat.data().value(back - 1) = dup_func(mat.data().value(back - 1), it->value()); + } else { + // push triplets to back + mat.data().index(back) = i; + mat.data().value(back) = it->value(); + previous_j = j; + previous_i = i; + back++; + } + } + // matrix is finalized } } @@ -1105,47 +1199,71 @@ void SparseMatrix::setFromTriplets(const InputIte internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); } +/** The same as setFromTriplets but triplets are assumed to be pre-sorted. This is faster and requires less temporary storage. + * Two triplets `a` and `b` are appropriately ordered if: + * \code + * ColMajor: ((a.col() != b.col()) ? (a.col() < b.col()) : (a.row() < b.row()) + * RowMajor: ((a.row() != b.row()) ? (a.row() < b.row()) : (a.col() < b.col()) + * \endcode + */ +template +template +void SparseMatrix::setFromSortedTriplets(const InputIterators& begin, const InputIterators& end) +{ + internal::set_from_triplets_sorted >(begin, end, *this, internal::scalar_sum_op()); +} + /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: * \code * value = dup_func(OldValue, NewValue) - * \endcode + * \endcode * Here is a C++11 example keeping the latest entry only: * \code * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; }); * \endcode */ template -template -void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func) +template +void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func) { - internal::set_from_triplets, DupFunctor>(begin, end, *this, dup_func); + internal::set_from_triplets, DupFunctor>(begin, end, *this, dup_func); +} + +/** The same as setFromSortedTriplets but when duplicates are met the functor \a dup_func is applied: + * \code + * value = dup_func(OldValue, NewValue) + * \endcode + * Here is a C++11 example keeping the latest entry only: + * \code + * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; }); + * \endcode + */ +template +template +void SparseMatrix::setFromSortedTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func) +{ + internal::set_from_triplets_sorted, DupFunctor>(begin, end, *this, dup_func); } /** \internal */ template -template -void SparseMatrix::collapseDuplicates(DupFunctor dup_func) +template +void SparseMatrix::collapseDuplicates(DenseBase& wi, DupFunctor dup_func) { - eigen_assert(!isCompressed()); - // TODO, in practice we should be able to use m_innerNonZeros for that task - IndexVector wi(innerSize()); - wi.fill(-1); + eigen_assert(wi.size() >= m_innerSize); + constexpr StorageIndex kEmptyIndexValue(-1); + wi.setConstant(kEmptyIndexValue); StorageIndex count = 0; // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers - for(Index j=0; j=start) - { + for (Index j = 0; j < m_outerSize; ++j) { + StorageIndex start = count; + StorageIndex oldEnd = isCompressed() ? m_outerIndex[j + 1] : m_outerIndex[j] + m_innerNonZeros[j]; + for (StorageIndex k = m_outerIndex[j]; k < oldEnd; ++k) { + StorageIndex i = m_data.index(k); + if (wi(i) >= start) { // we already meet this entry => accumulate it m_data.value(wi(i)) = dup_func(m_data.value(wi(i)), m_data.value(k)); - } - else - { + } else { m_data.value(count) = m_data.value(k); m_data.index(count) = m_data.index(k); wi(i) = count; @@ -1155,13 +1273,17 @@ void SparseMatrix::collapseDuplicates(DupFunctor m_outerIndex[j] = start; } m_outerIndex[m_outerSize] = count; - // turn the matrix into compressed form - internal::conditional_aligned_delete_auto(m_innerNonZeros, m_outerSize); - m_innerNonZeros = 0; + if (m_innerNonZeros) { + internal::conditional_aligned_delete_auto(m_innerNonZeros, m_outerSize); + m_innerNonZeros = 0; + } m_data.resize(m_outerIndex[m_outerSize]); } + + +/** \internal */ template template EIGEN_DONT_INLINE SparseMatrix& SparseMatrix::operator=(const SparseMatrixBase& other) @@ -1235,274 +1357,130 @@ EIGEN_DONT_INLINE SparseMatrix& SparseMatrix -typename SparseMatrix::Scalar& SparseMatrix::insert(Index row, Index col) -{ - eigen_assert(row>=0 && row=0 && col(m_outerSize); - - std::fill(m_innerNonZeros, m_innerNonZeros + m_outerSize, StorageIndex(0)); - - // pack all inner-vectors to the end of the pre-allocated space - // and allocate the entire free-space to the first inner-vector - StorageIndex end = convert_index(m_data.allocatedSize()); - for(Index j=1; j<=m_outerSize; ++j) - m_outerIndex[j] = end; - } - else - { - // turn the matrix into non-compressed mode - m_innerNonZeros = internal::conditional_aligned_new_auto(m_outerSize); - for(Index j=0; j=0 && m_innerNonZeros[j]==0) - m_outerIndex[j--] = p; - - // push back the new element - ++m_innerNonZeros[outer]; - m_data.append(Scalar(0), inner); - - // check for reallocation - if(data_end != m_data.allocatedSize()) - { - // m_data has been reallocated - // -> move remaining inner-vectors back to the end of the free-space - // so that the entire free-space is allocated to the current inner-vector. - eigen_internal_assert(data_end < m_data.allocatedSize()); - StorageIndex new_end = convert_index(m_data.allocatedSize()); - for(Index k=outer+1; k<=m_outerSize; ++k) - if(m_outerIndex[k]==data_end) - m_outerIndex[k] = new_end; - } - return m_data.value(p); - } - - // Second case: the next inner-vector is packed to the end - // and the current inner-vector end match the used-space. - if(m_outerIndex[outer+1]==data_end && m_outerIndex[outer]+m_innerNonZeros[outer]==m_data.size()) - { - eigen_internal_assert(outer+1==m_outerSize || m_innerNonZeros[outer+1]==0); - - // add space for the new element - ++m_innerNonZeros[outer]; - m_data.resize(m_data.size()+1); - - // check for reallocation - if(data_end != m_data.allocatedSize()) - { - // m_data has been reallocated - // -> move remaining inner-vectors back to the end of the free-space - // so that the entire free-space is allocated to the current inner-vector. - eigen_internal_assert(data_end < m_data.allocatedSize()); - StorageIndex new_end = convert_index(m_data.allocatedSize()); - for(Index k=outer+1; k<=m_outerSize; ++k) - if(m_outerIndex[k]==data_end) - m_outerIndex[k] = new_end; - } - - // and insert it at the right position (sorted insertion) - Index startId = m_outerIndex[outer]; - Index p = m_outerIndex[outer]+m_innerNonZeros[outer]-1; - while ( (p > startId) && (m_data.index(p-1) > inner) ) - { - m_data.index(p) = m_data.index(p-1); - m_data.value(p) = m_data.value(p-1); - --p; - } - - m_data.index(p) = convert_index(inner); - return (m_data.value(p) = Scalar(0)); - } - - if(m_data.size() != m_data.allocatedSize()) - { - // make sure the matrix is compatible to random un-compressed insertion: - m_data.resize(m_data.allocatedSize()); - this->reserveInnerVectors(Array::Constant(m_outerSize, 2)); - } - - return insertUncompressed(row,col); +template +inline typename SparseMatrix::Scalar& SparseMatrix::insert( + Index row, Index col) { + Index outer = IsRowMajor ? row : col; + Index inner = IsRowMajor ? col : row; + Index start = outerIndexPtr()[outer]; + Index end = isCompressed() ? outerIndexPtr()[outer + 1] : start + innerNonZeroPtr()[outer]; + Index dst = data().searchLowerIndex(start, end, inner); + eigen_assert((dst == end || data().index(dst) != inner) && + "you cannot insert an element that already exists, you must call coeffRef to this end"); + return insertAtByOuterInner(outer, inner, dst); } - -template -EIGEN_DONT_INLINE typename SparseMatrix::Scalar& SparseMatrix::insertUncompressed(Index row, Index col) -{ + +template +EIGEN_STRONG_INLINE typename SparseMatrix::Scalar& +SparseMatrix::insertAtByOuterInner(Index outer, Index inner, Index dst) { + if (isCompressed()) + return insertCompressedAtByOuterInner(outer, inner, dst); + else + return insertUncompressedAtByOuterInner(outer, inner, dst); +} + +template +EIGEN_DEPRECATED EIGEN_DONT_INLINE typename SparseMatrix::Scalar& +SparseMatrix::insertUncompressed(Index row, Index col) { eigen_assert(!isCompressed()); - - const Index outer = IsRowMajor ? row : col; - const StorageIndex inner = convert_index(IsRowMajor ? col : row); - - Index room = m_outerIndex[outer+1] - m_outerIndex[outer]; - StorageIndex innerNNZ = m_innerNonZeros[outer]; - if(innerNNZ>=room) - { - // this inner vector is full, we need to reallocate the whole buffer :( - reserve(SingletonVector(outer,std::max(2,innerNNZ))); - } - - Index startId = m_outerIndex[outer]; - Index p = startId + m_innerNonZeros[outer]; - while ( (p > startId) && (m_data.index(p-1) > inner) ) - { - m_data.index(p) = m_data.index(p-1); - m_data.value(p) = m_data.value(p-1); - --p; - } - eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exists, you must call coeffRef to this end"); - - m_innerNonZeros[outer]++; - - m_data.index(p) = inner; - return (m_data.value(p) = Scalar(0)); + Index outer = IsRowMajor ? row : col; + Index inner = IsRowMajor ? col : row; + Index start = outerIndexPtr()[outer]; + Index end = start + innerNonZeroPtr()[outer]; + Index dst = data().searchLowerIndex(start, end, inner); + eigen_assert((dst == end || data().index(dst) != inner) && + "you cannot insert an element that already exists, you must call coeffRef to this end"); + return insertUncompressedAtByOuterInner(outer, inner, dst); } -template -EIGEN_DONT_INLINE typename SparseMatrix::Scalar& SparseMatrix::insertCompressed(Index row, Index col) -{ +template +EIGEN_DEPRECATED EIGEN_DONT_INLINE typename SparseMatrix::Scalar& +SparseMatrix::insertCompressed(Index row, Index col) { eigen_assert(isCompressed()); + Index outer = IsRowMajor ? row : col; + Index inner = IsRowMajor ? col : row; + Index start = outerIndexPtr()[outer]; + Index end = outerIndexPtr()[outer + 1]; + Index dst = data().searchLowerIndex(start, end, inner); + eigen_assert((dst == end || data().index(dst) != inner) && + "you cannot insert an element that already exists, you must call coeffRef to this end"); + return insertCompressedAtByOuterInner(outer, inner, dst); +} - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - Index previousOuter = outer; - if (m_outerIndex[outer+1]==0) - { - // we start a new inner vector - while (previousOuter>=0 && m_outerIndex[previousOuter]==0) - { - m_outerIndex[previousOuter] = convert_index(m_data.size()); - --previousOuter; - } - m_outerIndex[outer+1] = m_outerIndex[outer]; +template +EIGEN_STRONG_INLINE void SparseMatrix::checkAllocatedSpaceAndMaybeExpand() { + // if there is no capacity for a single insertion, double the capacity + if (data().allocatedSize() <= data().size()) { + // increase capacity by a mininum of 32 + Index minReserve = 32; + Index reserveSize = numext::maxi(minReserve, data().allocatedSize()); + data().reserve(reserveSize); } +} - // here we have to handle the tricky case where the outerIndex array - // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g., - // the 2nd inner vector... - bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0)) - && (std::size_t(m_outerIndex[outer+1]) == m_data.size()); +template +typename SparseMatrix::Scalar& +SparseMatrix::insertCompressedAtByOuterInner(Index outer, Index inner, Index dst) { + eigen_assert(isCompressed()); + // compressed insertion always requires expanding the buffer + checkAllocatedSpaceAndMaybeExpand(); + data().resize(data().size() + 1); + Index chunkSize = outerIndexPtr()[outerSize()] - dst; + // shift the existing data to the right if necessary + if (chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize); + // update nonzero counts + for (; outer < outerSize(); outer++) outerIndexPtr()[outer + 1]++; + // initialize the coefficient + data().index(dst) = StorageIndex(inner); + // return a reference to the coefficient + return data().value(dst) = Scalar(0); +} - std::size_t startId = m_outerIndex[outer]; - // FIXME let's make sure sizeof(long int) == sizeof(std::size_t) - std::size_t p = m_outerIndex[outer+1]; - ++m_outerIndex[outer+1]; - - double reallocRatio = 1; - if (m_data.allocatedSize()<=m_data.size()) - { - // if there is no preallocated memory, let's reserve a minimum of 32 elements - if (m_data.size()==0) - { - m_data.reserve(32); - } - else - { - // we need to reallocate the data, to reduce multiple reallocations - // we use a smart resize algorithm based on the current filling ratio - // in addition, we use double to avoid integers overflows - double nnzEstimate = double(m_outerIndex[outer])*double(m_outerSize)/double(outer+1); - reallocRatio = (nnzEstimate-double(m_data.size()))/double(m_data.size()); - // furthermore we bound the realloc ratio to: - // 1) reduce multiple minor realloc when the matrix is almost filled - // 2) avoid to allocate too much memory when the matrix is almost empty - reallocRatio = (std::min)((std::max)(reallocRatio,1.5),8.); +template +typename SparseMatrix::Scalar& +SparseMatrix::insertUncompressedAtByOuterInner(Index outer, Index inner, Index dst) { + eigen_assert(!isCompressed()); + // find nearest outer vector to the right with capacity (if any) to minimize copy size + Index target = outer; + for (; target < outerSize(); target++) { + Index start = outerIndexPtr()[target]; + Index end = start + innerNonZeroPtr()[target]; + Index capacity = outerIndexPtr()[target + 1] - end; + if (capacity > 0) { + // `target` has room for interior insertion + Index chunkSize = end - dst; + // shift the existing data to the right if necessary + if(chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize); + break; } } - m_data.resize(m_data.size()+1,reallocRatio); - - if (!isLastVec) - { - if (previousOuter==-1) - { - // oops wrong guess. - // let's correct the outer offsets - for (Index k=0; k<=(outer+1); ++k) - m_outerIndex[k] = 0; - Index k=outer+1; - while(m_outerIndex[k]==0) - m_outerIndex[k++] = 1; - while (k<=m_outerSize && m_outerIndex[k]!=0) - m_outerIndex[k++]++; - p = 0; - --k; - k = m_outerIndex[k]-1; - while (k>0) - { - m_data.index(k) = m_data.index(k-1); - m_data.value(k) = m_data.value(k-1); - k--; - } - } - else - { - // we are not inserting into the last inner vec - // update outer indices: - Index j = outer+2; - while (j<=m_outerSize && m_outerIndex[j]!=0) - m_outerIndex[j++]++; - --j; - // shift data of last vecs: - Index k = m_outerIndex[j]-1; - while (k>=Index(p)) - { - m_data.index(k) = m_data.index(k-1); - m_data.value(k) = m_data.value(k-1); - k--; - } - } + if (target == outerSize()) { + // no room for interior insertion, must expand storage + checkAllocatedSpaceAndMaybeExpand(); + data().resize(data().size() + 1); + // shift the existing data to the right if necessary + Index chunkSize = outerIndexPtr()[outerSize()] - dst; + if (chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize); } - - while ( (p > startId) && (m_data.index(p-1) > inner) ) - { - m_data.index(p) = m_data.index(p-1); - m_data.value(p) = m_data.value(p-1); - --p; - } - - m_data.index(p) = inner; - return (m_data.value(p) = Scalar(0)); + // update nonzero counts + innerNonZeroPtr()[outer]++; + for (; outer < target; outer++) outerIndexPtr()[outer + 1]++; + // initialize the coefficient + data().index(dst) = StorageIndex(inner); + // return a reference to the coefficient + return data().value(dst) = Scalar(0); } namespace internal { -template -struct evaluator > - : evaluator > > -{ - typedef evaluator > > Base; - typedef SparseMatrix SparseMatrixType; - evaluator() : Base() {} - explicit evaluator(const SparseMatrixType &mat) : Base(mat) {} -}; + template + struct evaluator> + : evaluator>> { + typedef evaluator>> Base; + typedef SparseMatrix SparseMatrixType; + evaluator() : Base() {} + explicit evaluator(const SparseMatrixType& mat) : Base(mat) {} + }; } diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 52ff7b70d..6e2661e8d 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -477,14 +477,45 @@ template void sparse_basic(const SparseMatrixType& re refMat_prod(r,c) *= v; refMat_last(r,c) = v; } + SparseMatrixType m(rows,cols); m.setFromTriplets(triplets.begin(), triplets.end()); VERIFY_IS_APPROX(m, refMat_sum); + VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize()); m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies()); VERIFY_IS_APPROX(m, refMat_prod); + VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize()); m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; }); VERIFY_IS_APPROX(m, refMat_last); + VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize()); + + // test setFromSortedTriplets + + struct triplet_comp { + inline bool operator()(const TripletType& a, const TripletType& b) { + return SparseMatrixType::IsRowMajor ? ((a.row() != b.row()) ? (a.row() < b.row()) : (a.col() < b.col())) + : ((a.col() != b.col()) ? (a.col() < b.col()) : (a.row() < b.row())); + } + }; + + // stable_sort is only necessary when the reduction functor is dependent on the order of the triplets + // this is the case with refMat_last + // for most cases, std::sort is sufficient and preferred + std::stable_sort(triplets.begin(), triplets.end(), triplet_comp()); + + m.setZero(); + m.setFromSortedTriplets(triplets.begin(), triplets.end()); + VERIFY_IS_APPROX(m, refMat_sum); + VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize()); + + m.setFromSortedTriplets(triplets.begin(), triplets.end(), std::multiplies()); + VERIFY_IS_APPROX(m, refMat_prod); + VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize()); + + m.setFromSortedTriplets(triplets.begin(), triplets.end(), [](Scalar, Scalar b) { return b; }); + VERIFY_IS_APPROX(m, refMat_last); + VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize()); } // test Map