From 9463fc95f4ddbf2cc81176bbbeae58655dba4eb6 Mon Sep 17 00:00:00 2001 From: Charles Schlosser Date: Wed, 11 Jan 2023 06:24:49 +0000 Subject: [PATCH] change insert strategy --- Eigen/src/SparseCore/SparseMatrix.h | 70 +++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 4dea78829..eaefcefc6 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -92,6 +92,31 @@ struct traits, Dia }; }; +template +struct sparse_reserve_op { + EIGEN_DEVICE_FUNC sparse_reserve_op(Index begin, Index end, Index size) { + Index range = numext::mini(end - begin, size); + m_begin = begin; + m_end = begin + range; + m_val = StorageIndex(size / range); + m_remainder = StorageIndex(size % range); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex operator()(IndexType i) const { + if ((i >= m_begin) && (i < m_end)) + return m_val + ((i - m_begin) < m_remainder ? 1 : 0); + else + return 0; + } + StorageIndex m_val, m_remainder; + Index m_begin, m_end; +}; + +template +struct functor_traits> { + enum { Cost = 1, PacketAccess = false, IsRepeatable = true }; +}; + } // end namespace internal template @@ -971,7 +996,7 @@ protected: assignFunc.assignCoeff(insertBackUncompressed(j, j), diaEval.coeff(j)); // the insertion requires a data move, record insertion location and handle in second pass else { - insertionLocations.coeffRef(j) = dst; + insertionLocations.coeffRef(j) = StorageIndex(dst); deferredInsertions++; // if there is no capacity, all vectors to the right of this are shifted if (capacity == 0) shift++; @@ -1373,10 +1398,8 @@ inline typename SparseMatrix::Scalar& SparseMa template EIGEN_STRONG_INLINE typename SparseMatrix::Scalar& SparseMatrix::insertAtByOuterInner(Index outer, Index inner, Index dst) { - if (isCompressed()) - return insertCompressedAtByOuterInner(outer, inner, dst); - else - return insertUncompressedAtByOuterInner(outer, inner, dst); + uncompress(); + return insertUncompressedAtByOuterInner(outer, inner, dst); } template @@ -1429,7 +1452,8 @@ SparseMatrix::insertCompressedAtByOuterInner(I // shift the existing data to the right if necessary if (chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize); // update nonzero counts - for (; outer < outerSize(); outer++) outerIndexPtr()[outer + 1]++; + typename IndexVector::AlignedMapType outerIndexMap(outerIndexPtr(), outerSize() + 1); + outerIndexMap.segment(outer + 1, outerSize() - outer).array() += 1; // initialize the coefficient data().index(dst) = StorageIndex(inner); // return a reference to the coefficient @@ -1450,21 +1474,41 @@ SparseMatrix::insertUncompressedAtByOuterInner // `target` has room for interior insertion Index chunkSize = end - dst; // shift the existing data to the right if necessary - if(chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize); + if (chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize); break; } } if (target == outerSize()) { - // no room for interior insertion, must expand storage - checkAllocatedSpaceAndMaybeExpand(); - data().resize(data().size() + 1); - // shift the existing data to the right if necessary - Index chunkSize = outerIndexPtr()[outerSize()] - dst; + // no room for interior insertion (to the right of `outer`) + target = outer; + Index dst_offset = dst - outerIndexPtr()[target]; + Index totalCapacity = data().allocatedSize() - data().size(); + eigen_assert(totalCapacity >= 0); + if (totalCapacity == 0) { + // there is no room left. we must reallocate. reserve space in each vector + constexpr StorageIndex kReserveSizePerVector(2); + reserveInnerVectors(IndexVector::Constant(outerSize(), kReserveSizePerVector)); + } else { + // dont reallocate, but re-distribute the remaining capacity to the right of `outer` + // each vector in the range [outer,outerSize) will receive totalCapacity / (outerSize - outer) nonzero + // reservations each vector in the range [outer,remainder) will receive an additional nonzero reservation where + // remainder = totalCapacity % (outerSize - outer) + typedef internal::sparse_reserve_op ReserveSizesOp; + typedef CwiseNullaryOp ReserveSizesXpr; + ReserveSizesXpr reserveSizesXpr(outerSize(), 1, ReserveSizesOp(target, outerSize(), totalCapacity)); + eigen_assert(reserveSizesXpr.sum() == totalCapacity); + reserveInnerVectors(reserveSizesXpr); + } + Index start = outerIndexPtr()[target]; + Index end = start + innerNonZeroPtr()[target]; + dst = start + dst_offset; + Index chunkSize = end - dst; if (chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize); } // update nonzero counts innerNonZeroPtr()[outer]++; - for (; outer < target; outer++) outerIndexPtr()[outer + 1]++; + typename IndexVector::AlignedMapType outerIndexMap(outerIndexPtr(), outerSize() + 1); + outerIndexMap.segment(outer + 1, target - outer).array() += 1; // initialize the coefficient data().index(dst) = StorageIndex(inner); // return a reference to the coefficient