diff --git a/Eigen/SparseCore b/Eigen/SparseCore
index b2db46ba8..352f1ecec 100644
--- a/Eigen/SparseCore
+++ b/Eigen/SparseCore
@@ -17,6 +17,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <algorithm>
+#include <numeric>
 
 /** 
   * \defgroup SparseCore_Module SparseCore module
diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h
index 733b1aa73..d89d99ea5 100644
--- a/Eigen/src/SparseCore/CompressedStorage.h
+++ b/Eigen/src/SparseCore/CompressedStorage.h
@@ -132,15 +132,7 @@ class CompressedStorage
     /** \returns the largest \c k in [start,end) such that for all \c j in [start,k) index[\c j]\<\a key */
     inline Index searchLowerIndex(Index start, Index end, Index key) const
     {
-      while(end>start)
-      {
-        Index mid = (end+start)>>1;
-        if (m_indices[mid]<key)
-          start = mid+1;
-        else
-          end = mid;
-      }
-      return start;
+      return static_cast<Index>(std::distance(m_indices, std::lower_bound(m_indices + start, m_indices + end, key)));
     }
 
     /** \returns the stored value at index \a key
@@ -198,19 +190,12 @@ class CompressedStorage
       return m_values[id];
     }
 
-    void moveChunk(Index from, Index to, Index chunkSize)
+    inline void moveChunk(Index from, Index to, Index chunkSize)
     {
-      eigen_internal_assert(to+chunkSize <= m_size);
-      if(to>from && from+chunkSize>to)
-      {
-        // move backward
-        internal::smart_memmove(m_values+from,  m_values+from+chunkSize,  m_values+to);
-        internal::smart_memmove(m_indices+from, m_indices+from+chunkSize, m_indices+to);
-      }
-      else
-      {
-        internal::smart_copy(m_values+from,  m_values+from+chunkSize,  m_values+to);
-        internal::smart_copy(m_indices+from, m_indices+from+chunkSize, m_indices+to);
+      eigen_internal_assert(chunkSize >= 0 && to+chunkSize <= m_size);
+      if (chunkSize > 0) {
+        internal::smart_memmove(m_values + from, m_values + from + chunkSize, m_values + to);
+        internal::smart_memmove(m_indices + from, m_indices + from + chunkSize, m_indices + to);
       }
     }
 
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 68068124f..4dea78829 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -196,7 +196,7 @@ class SparseMatrix
       const Index outer = IsRowMajor ? row : col;
       const Index inner = IsRowMajor ? col : row;
       Index end = m_innerNonZeros ? m_outerIndex[outer] + m_innerNonZeros[outer] : m_outerIndex[outer+1];
-      return m_data.atInRange(m_outerIndex[outer], end, StorageIndex(inner));
+      return m_data.atInRange(m_outerIndex[outer], end, inner);
     }
 
     /** \returns a non-const reference to the value of the matrix at position \a i, \a j
@@ -210,20 +210,17 @@ class SparseMatrix
     inline Scalar& coeffRef(Index row, Index col)
     {
       eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());
-      
       const Index outer = IsRowMajor ? row : col;
       const Index inner = IsRowMajor ? col : row;
-
       Index start = m_outerIndex[outer];
-      Index end = m_innerNonZeros ? m_outerIndex[outer] + m_innerNonZeros[outer] : m_outerIndex[outer+1];
-      eigen_assert(end>=start && "you probably called coeffRef on a non finalized matrix");
-      if(end<=start)
-        return insert(row,col);
-      const Index p = m_data.searchLowerIndex(start,end-1,StorageIndex(inner));
-      if((p<end) && (m_data.index(p)==inner))
-        return m_data.value(p);
+      Index end = m_innerNonZeros ? m_outerIndex[outer] + m_innerNonZeros[outer] : m_outerIndex[outer + 1];
+      eigen_assert(end >= start && "you probably called coeffRef on a non finalized matrix");
+      if (end <= start) return insertAtByOuterInner(outer, inner, start);
+      Index dst = m_data.searchLowerIndex(start, end, inner);
+      if ((dst < end) && (m_data.index(dst) == inner))
+        return m_data.value(dst);
       else
-        return insert(row,col);
+        return insertAtByOuterInner(outer, inner, dst);
     }
 
     /** \returns a reference to a novel non zero coefficient with coordinates \a row x \a col.
@@ -241,7 +238,7 @@ class SparseMatrix
       * if the elements of each inner vector are inserted in increasing inner index order, and in O(nnz_j) for a random insertion.
       *
       */
-    Scalar& insert(Index row, Index col);
+    inline Scalar& insert(Index row, Index col);
 
   public:
 
@@ -301,6 +298,11 @@ class SparseMatrix
       if(isCompressed())
       {
         Index totalReserveSize = 0;
+        for (Index j = 0; j < m_outerSize; ++j) totalReserveSize += reserveSizes[j];
+
+        // if reserveSizes is empty, don't do anything!
+        if (totalReserveSize == 0) return;
+
         // turn the matrix into non-compressed mode
         m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
         
@@ -312,18 +314,18 @@ class SparseMatrix
         {
           newOuterIndex[j] = count;
           count += reserveSizes[j] + (m_outerIndex[j+1]-m_outerIndex[j]);
-          totalReserveSize += reserveSizes[j];
         }
+
         m_data.reserve(totalReserveSize);
         StorageIndex previousOuterIndex = m_outerIndex[m_outerSize];
         for(Index j=m_outerSize-1; j>=0; --j)
         {
           StorageIndex innerNNZ = previousOuterIndex - m_outerIndex[j];
-          for(Index i=innerNNZ-1; i>=0; --i)
-          {
-            m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);
-            m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);
-          }
+          StorageIndex begin = m_outerIndex[j];
+          StorageIndex end = begin + innerNNZ;
+          StorageIndex target = newOuterIndex[j];
+          internal::smart_memmove(innerIndexPtr() + begin, innerIndexPtr() + end, innerIndexPtr() + target);
+          internal::smart_memmove(valuePtr() + begin, valuePtr() + end, valuePtr() + target);
           previousOuterIndex = m_outerIndex[j];
           m_outerIndex[j] = newOuterIndex[j];
           m_innerNonZeros[j] = innerNNZ;
@@ -350,15 +352,15 @@ class SparseMatrix
         m_data.resize(count);
         for(Index j=m_outerSize-1; j>=0; --j)
         {
-          Index offset = newOuterIndex[j] - m_outerIndex[j];
+          StorageIndex offset = newOuterIndex[j] - m_outerIndex[j];
           if(offset>0)
           {
             StorageIndex innerNNZ = m_innerNonZeros[j];
-            for(Index i=innerNNZ-1; i>=0; --i)
-            {
-              m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i);
-              m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i);
-            }
+            StorageIndex begin = m_outerIndex[j];
+            StorageIndex end = begin + innerNNZ;
+            StorageIndex target = newOuterIndex[j];
+            internal::smart_memmove(innerIndexPtr() + begin, innerIndexPtr() + end, innerIndexPtr() + target);
+            internal::smart_memmove(valuePtr() + begin, valuePtr() + end, valuePtr() + target);
           }
         }
         
@@ -392,7 +394,7 @@ class SparseMatrix
     {
       eigen_assert(Index(m_outerIndex[outer+1]) == m_data.size() && "Invalid ordered insertion (invalid outer index)");
       eigen_assert( (m_outerIndex[outer+1]-m_outerIndex[outer]==0 || m_data.index(m_data.size()-1)<inner) && "Invalid ordered insertion (invalid inner index)");
-      Index p = m_outerIndex[outer+1];
+      StorageIndex p = m_outerIndex[outer+1];
       ++m_outerIndex[outer+1];
       m_data.append(Scalar(0), inner);
       return m_data.value(p);
@@ -402,7 +404,7 @@ class SparseMatrix
       * \warning use it only if you know what you are doing */
     inline Scalar& insertBackByOuterInnerUnordered(Index outer, Index inner)
     {
-      Index p = m_outerIndex[outer+1];
+      StorageIndex p = m_outerIndex[outer+1];
       ++m_outerIndex[outer+1];
       m_data.append(Scalar(0), inner);
       return m_data.value(p);
@@ -446,10 +448,14 @@ class SparseMatrix
     template<typename InputIterators,typename DupFunctor>
     void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func);
 
-    void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op<Scalar,Scalar>()); }
+    template<typename Derived, typename DupFunctor>
+    void collapseDuplicates(DenseBase<Derived>& wi, DupFunctor dup_func = DupFunctor());
 
-    template<typename DupFunctor>
-    void collapseDuplicates(DupFunctor dup_func = DupFunctor());
+    template<typename InputIterators>
+    void setFromSortedTriplets(const InputIterators& begin, const InputIterators& end);
+
+    template<typename InputIterators, typename DupFunctor>
+    void setFromSortedTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func);
 
     //---
     
@@ -464,27 +470,23 @@ class SparseMatrix
       */
     void makeCompressed()
     {
-      if(isCompressed())
-        return;
+      if (isCompressed()) return;
       
       eigen_internal_assert(m_outerIndex!=0 && m_outerSize>0);
       
-      Index oldStart = m_outerIndex[1];
+      StorageIndex start = m_outerIndex[1];
       m_outerIndex[1] = m_innerNonZeros[0];
       for(Index j=1; j<m_outerSize; ++j)
       {
-        Index nextOldStart = m_outerIndex[j+1];
-        Index offset = oldStart - m_outerIndex[j];
-        if(offset>0)
+        StorageIndex end = start + m_innerNonZeros[j];
+        StorageIndex target = m_outerIndex[j];
+        if (start != target)
         {
-          for(Index k=0; k<m_innerNonZeros[j]; ++k)
-          {
-            m_data.index(m_outerIndex[j]+k) = m_data.index(oldStart+k);
-            m_data.value(m_outerIndex[j]+k) = m_data.value(oldStart+k);
-          }
+          internal::smart_memmove(innerIndexPtr() + start, innerIndexPtr() + end, innerIndexPtr() + target);
+          internal::smart_memmove(valuePtr() + start, valuePtr() + end, valuePtr() + target);
         }
-        m_outerIndex[j+1] = m_outerIndex[j] + m_innerNonZeros[j];
-        oldStart = nextOldStart;
+        start = m_outerIndex[j + 1];
+        m_outerIndex[j + 1] = m_outerIndex[j] + m_innerNonZeros[j];
       }
       internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
       m_innerNonZeros = 0;
@@ -495,13 +497,12 @@ class SparseMatrix
     /** Turns the matrix into the uncompressed mode */
     void uncompress()
     {
-      if(m_innerNonZeros != 0)
-        return; 
+      if(m_innerNonZeros != 0) return; 
       m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
-      for (Index i = 0; i < m_outerSize; i++)
-      {
-        m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; 
-      }
+      typename IndexVector::AlignedMapType innerNonZeroMap(m_innerNonZeros, m_outerSize);
+      typename IndexVector::ConstAlignedMapType outerIndexMap(m_outerIndex, m_outerSize);
+      typename IndexVector::ConstMapType nextOuterIndexMap(m_outerIndex + 1, m_outerSize);
+      innerNonZeroMap = nextOuterIndexMap - outerIndexMap;
     }
 
     /** Suppresses all nonzeros which are \b much \b smaller \b than \a reference under the tolerance \a epsilon */
@@ -520,27 +521,32 @@ class SparseMatrix
     template<typename KeepFunc>
     void prune(const KeepFunc& keep = KeepFunc())
     {
-      // TODO optimize the uncompressed mode to avoid moving and allocating the data twice
-      makeCompressed();
-
       StorageIndex k = 0;
       for(Index j=0; j<m_outerSize; ++j)
       {
-        Index previousStart = m_outerIndex[j];
-        m_outerIndex[j] = k;
-        Index end = m_outerIndex[j+1];
-        for(Index i=previousStart; i<end; ++i)
+        StorageIndex previousStart = m_outerIndex[j];
+        if (isCompressed())
+          m_outerIndex[j] = k;
+        else
+          k = m_outerIndex[j];
+        StorageIndex end = isCompressed() ? m_outerIndex[j+1] : previousStart + m_innerNonZeros[j];
+        for(StorageIndex i=previousStart; i<end; ++i)
         {
-          if(keep(IsRowMajor?j:m_data.index(i), IsRowMajor?m_data.index(i):j, m_data.value(i)))
-          {
+          StorageIndex row = IsRowMajor ? StorageIndex(j) : m_data.index(i);
+          StorageIndex col = IsRowMajor ? m_data.index(i) : StorageIndex(j);
+          bool keepEntry = keep(row, col, m_data.value(i));
+          if (keepEntry) {
             m_data.value(k) = m_data.value(i);
             m_data.index(k) = m_data.index(i);
             ++k;
-          }
+          } else if (!isCompressed())
+            m_innerNonZeros[j]--;
         }
       }
-      m_outerIndex[m_outerSize] = k;
-      m_data.resize(k,0);
+      if (isCompressed()) {
+        m_outerIndex[m_outerSize] = k;
+        m_data.resize(k, 0);
+      }
     }
 
     /** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched.
@@ -551,64 +557,50 @@ class SparseMatrix
       *
       * \sa reserve(), setZero(), makeCompressed()
       */
-    void conservativeResize(Index rows, Index cols) 
-    {
-      // No change
-      if (this->rows() == rows && this->cols() == cols) return;
-      
+    void conservativeResize(Index rows, Index cols) {
+
       // If one dimension is null, then there is nothing to be preserved
-      if(rows==0 || cols==0) return resize(rows,cols);
+      if (rows == 0 || cols == 0) return resize(rows, cols);
 
-      Index innerChange = IsRowMajor ? cols - this->cols() : rows - this->rows();
-      Index outerChange = IsRowMajor ? rows - this->rows() : cols - this->cols();
-      StorageIndex newInnerSize = convert_index(IsRowMajor ? cols : rows);
+      Index newOuterSize = IsRowMajor ? rows : cols;
+      Index newInnerSize = IsRowMajor ? cols : rows;
 
-      // Deals with inner non zeros
-      if (m_innerNonZeros)
-      {
-        // Resize m_innerNonZeros
-        m_innerNonZeros = internal::conditional_aligned_realloc_new_auto<StorageIndex, true>(
-              m_innerNonZeros, m_outerSize + outerChange, m_outerSize);
-        
-        for(Index i=m_outerSize; i<m_outerSize+outerChange; i++)          
-          m_innerNonZeros[i] = 0;
-      } 
-      else if (innerChange < 0) 
-      {
-        // Inner size decreased: allocate a new m_innerNonZeros
-        m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize + outerChange);
-        for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++)
-          m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i];
-        for(Index i = m_outerSize; i < m_outerSize + outerChange; i++)
-          m_innerNonZeros[i] = 0;
-      }
-      
-      // Change the m_innerNonZeros in case of a decrease of inner size
-      if (m_innerNonZeros && innerChange < 0)
-      {
-        for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++)
-        {
-          StorageIndex &n = m_innerNonZeros[i];
-          StorageIndex start = m_outerIndex[i];
-          while (n > 0 && m_data.index(start+n-1) >= newInnerSize) --n; 
+      Index innerChange = newInnerSize - innerSize();
+      Index outerChange = newOuterSize - outerSize();
+
+      if (outerChange != 0) {
+        m_outerIndex = internal::conditional_aligned_realloc_new_auto<StorageIndex, true>(
+            outerIndexPtr(), newOuterSize + 1, outerSize() + 1);
+
+        if (!isCompressed())
+          m_innerNonZeros = internal::conditional_aligned_realloc_new_auto<StorageIndex, true>(
+              innerNonZeroPtr(), newOuterSize, outerSize());
+
+        if (outerChange > 0) {
+          StorageIndex lastIdx = outerSize() == 0 ? StorageIndex(0) : outerIndexPtr()[outerSize()];
+          std::fill_n(outerIndexPtr() + outerSize(), outerChange + 1, lastIdx);
+
+          if (!isCompressed()) std::fill_n(innerNonZeroPtr() + outerSize(), outerChange, StorageIndex(0));
+        }
+      }
+      m_outerSize = newOuterSize;
+
+      if (innerChange < 0) {
+        for (Index j = 0; j < outerSize(); j++) {
+          Index start = outerIndexPtr()[j];
+          Index end = isCompressed() ? outerIndexPtr()[j + 1] : start + innerNonZeroPtr()[j];
+          Index lb = data().searchLowerIndex(start, end, newInnerSize);
+          if (lb != end) {
+            uncompress();
+            innerNonZeroPtr()[j] = StorageIndex(lb - start);
+          }
         }
       }
-      
       m_innerSize = newInnerSize;
 
-      // Re-allocate outer index structure if necessary
-      if (outerChange == 0)
-        return;
-          
-      m_outerIndex = internal::conditional_aligned_realloc_new_auto<StorageIndex, true>(
-          m_outerIndex, m_outerSize + outerChange + 1, m_outerSize + 1);
-      if (outerChange > 0)
-      {
-        StorageIndex lastIdx = m_outerSize == 0 ? 0 : m_outerIndex[m_outerSize];
-        for(Index i=m_outerSize; i<m_outerSize+outerChange+1; i++)          
-          m_outerIndex[i] = lastIdx; 
-      }
-      m_outerSize += outerChange;
+      Index newSize = outerIndexPtr()[outerSize()];
+      eigen_assert(newSize <= m_data.size());
+      m_data.resize(newSize);
     }
     
     /** Resizes the matrix to a \a rows x \a cols matrix and initializes it to zero.
@@ -740,12 +732,19 @@ class SparseMatrix
     inline void setIdentity()
     {
       eigen_assert(rows() == cols() && "ONLY FOR SQUARED MATRICES");
-      this->m_data.resize(rows());
-      Eigen::Map<IndexVector>(this->m_data.indexPtr(), rows()).setLinSpaced(0, StorageIndex(rows()-1));
-      Eigen::Map<ScalarVector>(this->m_data.valuePtr(), rows()).setOnes();
-      Eigen::Map<IndexVector>(this->m_outerIndex, rows()+1).setLinSpaced(0, StorageIndex(rows()));
-      internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
-      m_innerNonZeros = 0;
+      if (m_innerNonZeros) {
+        internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
+        m_innerNonZeros = 0;
+      }
+      m_data.resize(rows());
+      // is it necessary to squeeze?
+      m_data.squeeze();
+      typename IndexVector::AlignedMapType outerIndexMap(outerIndexPtr(), rows() + 1);
+      typename IndexVector::AlignedMapType innerIndexMap(innerIndexPtr(), rows());
+      typename ScalarVector::AlignedMapType valueMap(valuePtr(), rows());
+      outerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1));
+      innerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1));
+      valueMap.setOnes();
     }
 
     inline SparseMatrix& operator=(const SparseMatrix& other)
@@ -865,7 +864,7 @@ protected:
 
     /** \internal
       * \sa insert(Index,Index) */
-    EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col);
+    EIGEN_DEPRECATED EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col);
 
     /** \internal
       * A vector object that is equal to 0 everywhere but v at the position i */
@@ -884,7 +883,7 @@ protected:
 
     /** \internal
       * \sa insert(Index,Index) */
-    EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col);
+    EIGEN_DEPRECATED EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col);
 
 public:
     /** \internal
@@ -913,17 +912,18 @@ protected:
       *   1 - if *this is overwritten (Func==assign_op) or *this is empty, then we can work treat *this as a dense vector expression.
       *   2 - otherwise, for each diagonal coeff,
       *     2.a - if it already exists, then we update it,
-      *     2.b - otherwise, if *this is uncompressed and that the current inner-vector has empty room for at least 1 element, then we perform an in-place insertion.
-      *     2.c - otherwise, we'll have to reallocate and copy everything, so instead of doing so for each new element, it is recorded in a std::vector.
-      *   3 - at the end, if some entries failed to be inserted in-place, then we alloc a new buffer, copy each chunk at the right position, and insert the new elements.
-      * 
-      * TODO: some piece of code could be isolated and reused for a general in-place update strategy.
-      * TODO: if we start to defer the insertion of some elements (i.e., case 2.c executed once),
-      *       then it *might* be better to disable case 2.b since they will have to be copied anyway.
+      *     2.b - if the correct position is at the end of the vector, and there is capacity, push to back
+      *     2.b - otherwise, the insertion requires a data move, record insertion locations and handle in a second pass
+      *   3 - at the end, if some entries failed to be updated in-place, then we alloc a new buffer, copy each chunk at the right position, and insert the new elements.
       */
     template<typename DiagXpr, typename Func>
     void assignDiagonal(const DiagXpr diagXpr, const Func& assignFunc)
     {
+      
+      constexpr StorageIndex kEmptyIndexVal(-1);
+      typedef typename IndexVector::AlignedMapType IndexMap;
+      typedef typename ScalarVector::AlignedMapType ValueMap;
+
       Index n = diagXpr.size();
 
       const bool overwrite = internal::is_same<Func, internal::assign_op<Scalar,Scalar> >::value;
@@ -933,82 +933,106 @@ protected:
           this->resize(n, n);
       }
 
-      if(m_data.size()==0 || overwrite)
+      if(data().size()==0 || overwrite)
       {
-        typedef Array<StorageIndex,Dynamic,1> ArrayXI;  
-        this->makeCompressed();
-        this->resizeNonZeros(n);
-        Eigen::Map<ArrayXI>(this->innerIndexPtr(), n).setLinSpaced(0,StorageIndex(n)-1);
-        Eigen::Map<ArrayXI>(this->outerIndexPtr(), n+1).setLinSpaced(0,StorageIndex(n));
-        Eigen::Map<Array<Scalar,Dynamic,1> > values = this->coeffs();
-        values.setZero();
-        internal::call_assignment_no_alias(values, diagXpr, assignFunc);
+        if (!isCompressed()) {
+          internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
+          m_innerNonZeros = 0;
+        }
+        resizeNonZeros(n);
+        IndexMap outerIndexMap(outerIndexPtr(), n + 1);
+        IndexMap innerIndexMap(innerIndexPtr(), n);
+        ValueMap valueMap(valuePtr(), n);
+        outerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1));
+        innerIndexMap.setEqualSpaced(StorageIndex(0), StorageIndex(1));
+        valueMap.setZero();
+        internal::call_assignment_no_alias(valueMap, diagXpr, assignFunc);
       }
       else
       {
-        bool isComp = isCompressed();
-        internal::evaluator<DiagXpr> diaEval(diagXpr);
-        std::vector<IndexPosPair> newEntries;
+          internal::evaluator<DiagXpr> diaEval(diagXpr);
 
-        // 1 - try in-place update and record insertion failures
-        for(Index i = 0; i<n; ++i)
-        {
-          internal::LowerBoundIndex lb = this->lower_bound(i,i);
-          Index p = lb.value;
-          if(lb.found)
-          {
-            // the coeff already exists
-            assignFunc.assignCoeff(m_data.value(p), diaEval.coeff(i));
+          ei_declare_aligned_stack_constructed_variable(StorageIndex, tmp, n, 0);
+          typename IndexVector::AlignedMapType insertionLocations(tmp, n);
+          insertionLocations.setConstant(kEmptyIndexVal);
+
+          Index deferredInsertions = 0;
+          Index shift = 0;
+
+          for (Index j = 0; j < n; j++) {
+            Index begin = outerIndexPtr()[j];
+            Index end = isCompressed() ? outerIndexPtr()[j + 1] : begin + innerNonZeroPtr()[j];
+            Index capacity = outerIndexPtr()[j + 1] - end;
+            Index dst = data().searchLowerIndex(begin, end, j);
+            // the entry exists: update it now
+            if (dst != end && data().index(dst) == j) assignFunc.assignCoeff(data().value(dst), diaEval.coeff(j));
+            // the entry belongs at the back of the vector: push to back
+            else if (dst == end && capacity > 0)
+              assignFunc.assignCoeff(insertBackUncompressed(j, j), diaEval.coeff(j));
+            // the insertion requires a data move, record insertion location and handle in second pass
+            else {
+              insertionLocations.coeffRef(j) = dst;
+              deferredInsertions++;
+              // if there is no capacity, all vectors to the right of this are shifted
+              if (capacity == 0) shift++;
+            }
           }
-          else if((!isComp) && m_innerNonZeros[i] < (m_outerIndex[i+1]-m_outerIndex[i]))
-          {
-            // non compressed mode with local room for inserting one element
-            m_data.moveChunk(p, p+1, m_outerIndex[i]+m_innerNonZeros[i]-p);
-            m_innerNonZeros[i]++;
-            m_data.value(p) = Scalar(0);
-            m_data.index(p) = StorageIndex(i);
-            assignFunc.assignCoeff(m_data.value(p), diaEval.coeff(i));
-          }
-          else
-          {
-            // defer insertion
-            newEntries.push_back(IndexPosPair(i,p));
-          }
-        }
-        // 2 - insert deferred entries
-        Index n_entries = Index(newEntries.size());
-        if(n_entries>0)
-        {
-          Storage newData(m_data.size()+n_entries);
-          Index prev_p = 0;
-          Index prev_i = 0;
-          for(Index k=0; k<n_entries;++k)
-          {
-            Index i = newEntries[k].i;
-            Index p = newEntries[k].p;
-            internal::smart_copy(m_data.valuePtr()+prev_p, m_data.valuePtr()+p, newData.valuePtr()+prev_p+k);
-            internal::smart_copy(m_data.indexPtr()+prev_p, m_data.indexPtr()+p, newData.indexPtr()+prev_p+k);
-            for(Index j=prev_i;j<i;++j)
-              m_outerIndex[j+1] += k;
-            if(!isComp)
-              m_innerNonZeros[i]++;
-            prev_p = p;
-            prev_i = i;
-            newData.value(p+k) = Scalar(0);
-            newData.index(p+k) = StorageIndex(i);
-            assignFunc.assignCoeff(newData.value(p+k), diaEval.coeff(i));
-          }
-          {
-            internal::smart_copy(m_data.valuePtr()+prev_p, m_data.valuePtr()+m_data.size(), newData.valuePtr()+prev_p+n_entries);
-            internal::smart_copy(m_data.indexPtr()+prev_p, m_data.indexPtr()+m_data.size(), newData.indexPtr()+prev_p+n_entries);
-            for(Index j=prev_i+1;j<=m_outerSize;++j)
-              m_outerIndex[j] += n_entries;
-          }
-          m_data.swap(newData);
-        }
+          
+          if (deferredInsertions > 0) {
+
+            data().resize(data().size() + shift);
+            Index copyEnd = isCompressed() ? outerIndexPtr()[outerSize()]
+                                           : outerIndexPtr()[outerSize() - 1] + innerNonZeroPtr()[outerSize() - 1];
+            for (Index j = outerSize() - 1; deferredInsertions > 0; j--) {
+              Index begin = outerIndexPtr()[j];
+              Index end = isCompressed() ? outerIndexPtr()[j + 1] : begin + innerNonZeroPtr()[j];
+              Index capacity = outerIndexPtr()[j + 1] - end;
+
+              bool doInsertion = insertionLocations(j) >= 0;
+              bool breakUpCopy = doInsertion && (capacity > 0);
+              // break up copy for sorted insertion into inactive nonzeros
+              // optionally, add another criterium, i.e. 'breakUpCopy || (capacity > threhsold)'
+              // where `threshold >= 0` to skip inactive nonzeros in each vector
+              // this reduces the total number of copied elements, but requires more moveChunk calls
+              if (breakUpCopy) {
+                Index copyBegin = outerIndexPtr()[j + 1];
+                Index to = copyBegin + shift;
+                Index chunkSize = copyEnd - copyBegin;
+                if (chunkSize > 0) data().moveChunk(copyBegin, to, chunkSize);
+                copyEnd = end;
+              }
+
+              outerIndexPtr()[j + 1] += shift;
+              
+              if (doInsertion) {
+                // if there is capacity, shift into the inactive nonzeros
+                if (capacity > 0) shift++;
+                Index copyBegin = insertionLocations(j);
+                Index to = copyBegin + shift;
+                Index chunkSize = copyEnd - copyBegin;
+                if (chunkSize > 0) data().moveChunk(copyBegin, to, chunkSize);
+                Index dst = to - 1;
+                data().index(dst) = StorageIndex(j);
+                assignFunc.assignCoeff(data().value(dst) = Scalar(0), diaEval.coeff(j));
+                if (!isCompressed()) innerNonZeroPtr()[j]++;
+                shift--;
+                deferredInsertions--;
+                copyEnd = copyBegin;
+              }
+            }
+          }     
+          eigen_assert((shift == 0) && (deferredInsertions == 0));
       }
     }
 
+    /* Provides a consistent reserve and reallocation strategy for insertCompressed and insertUncompressed
+    If there is insufficient space for one insertion, the size of the memory buffer is doubled */
+    inline void checkAllocatedSpaceAndMaybeExpand();
+    /* These functions are used to avoid a redundant binary search operation in functions such as coeffRef() and assume `dst` is the appropriate sorted insertion point */
+    EIGEN_STRONG_INLINE Scalar& insertAtByOuterInner(Index outer, Index inner, Index dst);
+    Scalar& insertCompressedAtByOuterInner(Index outer, Index inner, Index dst);
+    Scalar& insertUncompressedAtByOuterInner(Index outer, Index inner, Index dst);
+
 private:
   EIGEN_STATIC_ASSERT(NumTraits<StorageIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE)
   EIGEN_STATIC_ASSERT((Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS)
@@ -1026,36 +1050,106 @@ private:
 
 namespace internal {
 
-template<typename InputIterator, typename SparseMatrixType, typename DupFunctor>
-void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, DupFunctor dup_func)
-{
-  enum { IsRowMajor = SparseMatrixType::IsRowMajor };
-  typedef typename SparseMatrixType::Scalar Scalar;
+// Creates a compressed sparse matrix from a range of unsorted triplets
+// Requires temporary storage to handle duplicate entries
+template <typename InputIterator, typename SparseMatrixType, typename DupFunctor>
+void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat,
+                       DupFunctor dup_func) {
+
+  constexpr bool IsRowMajor = SparseMatrixType::IsRowMajor;
   typedef typename SparseMatrixType::StorageIndex StorageIndex;
-  SparseMatrix<Scalar,IsRowMajor?ColMajor:RowMajor,StorageIndex> trMat(mat.rows(),mat.cols());
+  typedef typename VectorX<StorageIndex>::AlignedMapType IndexMap;
+  if (begin == end) return;
 
-  if(begin!=end)
-  {
-    // pass 1: count the nnz per inner-vector
-    typename SparseMatrixType::IndexVector wi(trMat.outerSize());
-    wi.setZero();
-    for(InputIterator it(begin); it!=end; ++it)
-    {
-      eigen_assert(it->row()>=0 && it->row()<mat.rows() && it->col()>=0 && it->col()<mat.cols());
-      wi(IsRowMajor ? it->col() : it->row())++;
-    }
-
-    // pass 2: insert all the elements into trMat
-    trMat.reserve(wi);
-    for(InputIterator it(begin); it!=end; ++it)
-      trMat.insertBackUncompressed(it->row(),it->col()) = it->value();
-
-    // pass 3:
-    trMat.collapseDuplicates(dup_func);
+  // free innerNonZeroPtr (if present) and zero outerIndexPtr
+  mat.resize(mat.rows(), mat.cols());
+  // allocate temporary storage for nonzero insertion (outer size) and duplicate removal (inner size)
+  ei_declare_aligned_stack_constructed_variable(StorageIndex, tmp, numext::maxi(mat.innerSize(), mat.outerSize()), 0);
+  // scan triplets to determine allocation size before constructing matrix
+  IndexMap outerIndexMap(mat.outerIndexPtr(), mat.outerSize() + 1);
+  for (InputIterator it(begin); it != end; ++it) {
+    eigen_assert(it->row() >= 0 && it->row() < mat.rows() && it->col() >= 0 && it->col() < mat.cols());
+    StorageIndex j = IsRowMajor ? it->row() : it->col();
+    outerIndexMap.coeffRef(j + 1)++;
   }
 
-  // pass 4: transposed copy -> implicit sorting
-  mat = trMat;
+  // finalize outer indices and allocate memory
+  std::partial_sum(outerIndexMap.begin(), outerIndexMap.end(), outerIndexMap.begin());
+  Index nonZeros = mat.outerIndexPtr()[mat.outerSize()];
+  mat.resizeNonZeros(nonZeros);
+
+  // use tmp to track nonzero insertions
+  IndexMap back(tmp, mat.outerSize());
+  back = outerIndexMap.head(mat.outerSize());
+
+  // push triplets to back of each inner vector
+  for (InputIterator it(begin); it != end; ++it) {
+    StorageIndex j = IsRowMajor ? it->row() : it->col();
+    StorageIndex i = IsRowMajor ? it->col() : it->row();
+    mat.data().index(back.coeff(j)) = i;
+    mat.data().value(back.coeff(j)) = it->value();
+    back.coeffRef(j)++;
+  }
+
+  // use tmp to collapse duplicates
+  IndexMap wi(tmp, mat.innerSize());
+  mat.collapseDuplicates(wi, dup_func);
+  mat.sortInnerIndices();
+}
+
+// Creates a compressed sparse matrix from a sorted range of triplets
+template <typename InputIterator, typename SparseMatrixType, typename DupFunctor>
+void set_from_triplets_sorted(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat,
+                              DupFunctor dup_func) {
+  constexpr bool IsRowMajor = SparseMatrixType::IsRowMajor;
+  typedef typename SparseMatrixType::StorageIndex StorageIndex;
+  typedef typename VectorX<StorageIndex>::AlignedMapType IndexMap;
+  if (begin == end) return;
+
+  constexpr StorageIndex kEmptyIndexValue(-1);
+  // deallocate inner nonzeros if present and zero outerIndexPtr
+  mat.resize(mat.rows(), mat.cols());
+  // use outer indices to count non zero entries (excluding duplicate entries)
+  StorageIndex previous_j = kEmptyIndexValue;
+  StorageIndex previous_i = kEmptyIndexValue;
+  // scan triplets to determine allocation size before constructing matrix
+  IndexMap outerIndexMap(mat.outerIndexPtr(), mat.outerSize() + 1);
+  for (InputIterator it(begin); it != end; ++it) {
+    eigen_assert(it->row() >= 0 && it->row() < mat.rows() && it->col() >= 0 && it->col() < mat.cols());
+    StorageIndex j = IsRowMajor ? it->row() : it->col();
+    StorageIndex i = IsRowMajor ? it->col() : it->row();
+    eigen_assert(j > previous_j || (j == previous_j && i >= previous_i));
+    // identify duplicates by examining previous location
+    bool duplicate = (previous_j == j) && (previous_i == i);
+    if (!duplicate) outerIndexMap.coeffRef(j + 1)++;
+    previous_j = j;
+    previous_i = i;
+  }
+  
+  // finalize outer indices and allocate memory
+  std::partial_sum(outerIndexMap.begin(), outerIndexMap.end(), outerIndexMap.begin());
+  Index nonZeros = mat.outerIndexPtr()[mat.outerSize()];
+  mat.resizeNonZeros(nonZeros);
+
+  previous_i = kEmptyIndexValue;
+  previous_j = kEmptyIndexValue;
+  Index back = 0;
+  for (InputIterator it(begin); it != end; ++it) {
+    StorageIndex j = IsRowMajor ? it->row() : it->col();
+    StorageIndex i = IsRowMajor ? it->col() : it->row();
+    bool duplicate = (previous_j == j) && (previous_i == i);
+    if (duplicate) {
+      mat.data().value(back - 1) = dup_func(mat.data().value(back - 1), it->value());
+    } else {
+      // push triplets to back
+      mat.data().index(back) = i;
+      mat.data().value(back) = it->value();
+      previous_j = j;
+      previous_i = i;
+      back++;
+    }
+  }
+  // matrix is finalized
 }
 
 }
@@ -1105,47 +1199,71 @@ void SparseMatrix<Scalar,Options_,StorageIndex_>::setFromTriplets(const InputIte
   internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,Options_,StorageIndex_> >(begin, end, *this, internal::scalar_sum_op<Scalar,Scalar>());
 }
 
+/** The same as setFromTriplets but triplets are assumed to be pre-sorted. This is faster and requires less temporary storage. 
+  * Two triplets `a` and `b` are appropriately ordered if:
+  * \code
+  * ColMajor: ((a.col() != b.col()) ? (a.col() < b.col()) : (a.row() < b.row())
+  * RowMajor: ((a.row() != b.row()) ? (a.row() < b.row()) : (a.col() < b.col())
+  * \endcode
+  */
+template<typename Scalar, int Options_, typename StorageIndex_>
+template<typename InputIterators>
+void SparseMatrix<Scalar, Options_, StorageIndex_>::setFromSortedTriplets(const InputIterators& begin, const InputIterators& end)
+{
+    internal::set_from_triplets_sorted<InputIterators, SparseMatrix<Scalar, Options_, StorageIndex_> >(begin, end, *this, internal::scalar_sum_op<Scalar, Scalar>());
+}
+
 /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied:
   * \code
   * value = dup_func(OldValue, NewValue)
-  * \endcode 
+  * \endcode
   * Here is a C++11 example keeping the latest entry only:
   * \code
   * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; });
   * \endcode
   */
 template<typename Scalar, int Options_, typename StorageIndex_>
-template<typename InputIterators,typename DupFunctor>
-void SparseMatrix<Scalar,Options_,StorageIndex_>::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func)
+template<typename InputIterators, typename DupFunctor>
+void SparseMatrix<Scalar, Options_, StorageIndex_>::setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func)
 {
-  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,Options_,StorageIndex_>, DupFunctor>(begin, end, *this, dup_func);
+    internal::set_from_triplets<InputIterators, SparseMatrix<Scalar, Options_, StorageIndex_>, DupFunctor>(begin, end, *this, dup_func);
+}
+
+/** The same as setFromSortedTriplets but when duplicates are met the functor \a dup_func is applied:
+  * \code
+  * value = dup_func(OldValue, NewValue)
+  * \endcode
+  * Here is a C++11 example keeping the latest entry only:
+  * \code
+  * mat.setFromTriplets(triplets.begin(), triplets.end(), [] (const Scalar&,const Scalar &b) { return b; });
+  * \endcode
+  */
+template<typename Scalar, int Options_, typename StorageIndex_>
+template<typename InputIterators, typename DupFunctor>
+void SparseMatrix<Scalar, Options_, StorageIndex_>::setFromSortedTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func)
+{
+    internal::set_from_triplets_sorted<InputIterators, SparseMatrix<Scalar, Options_, StorageIndex_>, DupFunctor>(begin, end, *this, dup_func);
 }
 
 /** \internal */
 template<typename Scalar, int Options_, typename StorageIndex_>
-template<typename DupFunctor>
-void SparseMatrix<Scalar,Options_,StorageIndex_>::collapseDuplicates(DupFunctor dup_func)
+template<typename Derived, typename DupFunctor>
+void SparseMatrix<Scalar, Options_, StorageIndex_>::collapseDuplicates(DenseBase<Derived>& wi, DupFunctor dup_func)
 {
-  eigen_assert(!isCompressed());
-  // TODO, in practice we should be able to use m_innerNonZeros for that task
-  IndexVector wi(innerSize());
-  wi.fill(-1);
+  eigen_assert(wi.size() >= m_innerSize);
+  constexpr StorageIndex kEmptyIndexValue(-1);
+  wi.setConstant(kEmptyIndexValue);
   StorageIndex count = 0;
   // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers
-  for(Index j=0; j<outerSize(); ++j)
-  {
-    StorageIndex start   = count;
-    Index oldEnd  = m_outerIndex[j]+m_innerNonZeros[j];
-    for(Index k=m_outerIndex[j]; k<oldEnd; ++k)
-    {
-      Index i = m_data.index(k);
-      if(wi(i)>=start)
-      {
+  for (Index j = 0; j < m_outerSize; ++j) {
+    StorageIndex start = count;
+    StorageIndex oldEnd = isCompressed() ? m_outerIndex[j + 1] : m_outerIndex[j] + m_innerNonZeros[j];
+    for (StorageIndex k = m_outerIndex[j]; k < oldEnd; ++k) {
+      StorageIndex i = m_data.index(k);
+      if (wi(i) >= start) {
         // we already meet this entry => accumulate it
         m_data.value(wi(i)) = dup_func(m_data.value(wi(i)), m_data.value(k));
-      }
-      else
-      {
+      } else {
         m_data.value(count) = m_data.value(k);
         m_data.index(count) = m_data.index(k);
         wi(i) = count;
@@ -1155,13 +1273,17 @@ void SparseMatrix<Scalar,Options_,StorageIndex_>::collapseDuplicates(DupFunctor
     m_outerIndex[j] = start;
   }
   m_outerIndex[m_outerSize] = count;
-
   // turn the matrix into compressed form
-  internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
-  m_innerNonZeros = 0;
+  if (m_innerNonZeros) {
+    internal::conditional_aligned_delete_auto<StorageIndex, true>(m_innerNonZeros, m_outerSize);
+    m_innerNonZeros = 0;
+  }
   m_data.resize(m_outerIndex[m_outerSize]);
 }
 
+
+
+/** \internal */
 template<typename Scalar, int Options_, typename StorageIndex_>
 template<typename OtherDerived>
 EIGEN_DONT_INLINE SparseMatrix<Scalar,Options_,StorageIndex_>& SparseMatrix<Scalar,Options_,StorageIndex_>::operator=(const SparseMatrixBase<OtherDerived>& other)
@@ -1235,274 +1357,130 @@ EIGEN_DONT_INLINE SparseMatrix<Scalar,Options_,StorageIndex_>& SparseMatrix<Scal
   }
 }
 
-template<typename Scalar_, int Options_, typename StorageIndex_>
-typename SparseMatrix<Scalar_,Options_,StorageIndex_>::Scalar& SparseMatrix<Scalar_,Options_,StorageIndex_>::insert(Index row, Index col)
-{
-  eigen_assert(row>=0 && row<rows() && col>=0 && col<cols());
-  
-  const Index outer = IsRowMajor ? row : col;
-  const Index inner = IsRowMajor ? col : row;
-  
-  if(isCompressed())
-  {
-    if(nonZeros()==0)
-    {
-      // reserve space if not already done
-      if(m_data.allocatedSize()==0)
-        m_data.reserve(2*m_innerSize);
-      
-      // turn the matrix into non-compressed mode
-      m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
-      
-      std::fill(m_innerNonZeros, m_innerNonZeros + m_outerSize, StorageIndex(0));
-      
-      // pack all inner-vectors to the end of the pre-allocated space
-      // and allocate the entire free-space to the first inner-vector
-      StorageIndex end = convert_index(m_data.allocatedSize());
-      for(Index j=1; j<=m_outerSize; ++j)
-        m_outerIndex[j] = end;
-    }
-    else
-    {
-      // turn the matrix into non-compressed mode
-      m_innerNonZeros = internal::conditional_aligned_new_auto<StorageIndex, true>(m_outerSize);
-      for(Index j=0; j<m_outerSize; ++j)
-        m_innerNonZeros[j] = m_outerIndex[j+1]-m_outerIndex[j];
-    }
-  }
-  
-  // check whether we can do a fast "push back" insertion
-  Index data_end = m_data.allocatedSize();
-  
-  // First case: we are filling a new inner vector which is packed at the end.
-  // We assume that all remaining inner-vectors are also empty and packed to the end.
-  if(m_outerIndex[outer]==data_end)
-  {
-    eigen_internal_assert(m_innerNonZeros[outer]==0);
-    
-    // pack previous empty inner-vectors to end of the used-space
-    // and allocate the entire free-space to the current inner-vector.
-    StorageIndex p = convert_index(m_data.size());
-    Index j = outer;
-    while(j>=0 && m_innerNonZeros[j]==0)
-      m_outerIndex[j--] = p;
-    
-    // push back the new element
-    ++m_innerNonZeros[outer];
-    m_data.append(Scalar(0), inner);
-    
-    // check for reallocation
-    if(data_end != m_data.allocatedSize())
-    {
-      // m_data has been reallocated
-      //  -> move remaining inner-vectors back to the end of the free-space
-      //     so that the entire free-space is allocated to the current inner-vector.
-      eigen_internal_assert(data_end < m_data.allocatedSize());
-      StorageIndex new_end = convert_index(m_data.allocatedSize());
-      for(Index k=outer+1; k<=m_outerSize; ++k)
-        if(m_outerIndex[k]==data_end)
-          m_outerIndex[k] = new_end;
-    }
-    return m_data.value(p);
-  }
-  
-  // Second case: the next inner-vector is packed to the end
-  // and the current inner-vector end match the used-space.
-  if(m_outerIndex[outer+1]==data_end && m_outerIndex[outer]+m_innerNonZeros[outer]==m_data.size())
-  {
-    eigen_internal_assert(outer+1==m_outerSize || m_innerNonZeros[outer+1]==0);
-    
-    // add space for the new element
-    ++m_innerNonZeros[outer];
-    m_data.resize(m_data.size()+1);
-    
-    // check for reallocation
-    if(data_end != m_data.allocatedSize())
-    {
-      // m_data has been reallocated
-      //  -> move remaining inner-vectors back to the end of the free-space
-      //     so that the entire free-space is allocated to the current inner-vector.
-      eigen_internal_assert(data_end < m_data.allocatedSize());
-      StorageIndex new_end = convert_index(m_data.allocatedSize());
-      for(Index k=outer+1; k<=m_outerSize; ++k)
-        if(m_outerIndex[k]==data_end)
-          m_outerIndex[k] = new_end;
-    }
-    
-    // and insert it at the right position (sorted insertion)
-    Index startId = m_outerIndex[outer];
-    Index p = m_outerIndex[outer]+m_innerNonZeros[outer]-1;
-    while ( (p > startId) && (m_data.index(p-1) > inner) )
-    {
-      m_data.index(p) = m_data.index(p-1);
-      m_data.value(p) = m_data.value(p-1);
-      --p;
-    }
-    
-    m_data.index(p) = convert_index(inner);
-    return (m_data.value(p) = Scalar(0));
-  }
-  
-  if(m_data.size() != m_data.allocatedSize())
-  {
-    // make sure the matrix is compatible to random un-compressed insertion:
-    m_data.resize(m_data.allocatedSize());
-    this->reserveInnerVectors(Array<StorageIndex,Dynamic,1>::Constant(m_outerSize, 2));
-  }
-  
-  return insertUncompressed(row,col);
+template <typename Scalar_, int Options_, typename StorageIndex_>
+inline typename SparseMatrix<Scalar_, Options_, StorageIndex_>::Scalar& SparseMatrix<Scalar_, Options_, StorageIndex_>::insert(
+    Index row, Index col) {
+  Index outer = IsRowMajor ? row : col;
+  Index inner = IsRowMajor ? col : row;
+  Index start = outerIndexPtr()[outer];
+  Index end = isCompressed() ? outerIndexPtr()[outer + 1] : start + innerNonZeroPtr()[outer];
+  Index dst = data().searchLowerIndex(start, end, inner);
+  eigen_assert((dst == end || data().index(dst) != inner) &&
+      "you cannot insert an element that already exists, you must call coeffRef to this end");
+  return insertAtByOuterInner(outer, inner, dst);
 }
-    
-template<typename Scalar_, int Options_, typename StorageIndex_>
-EIGEN_DONT_INLINE typename SparseMatrix<Scalar_,Options_,StorageIndex_>::Scalar& SparseMatrix<Scalar_,Options_,StorageIndex_>::insertUncompressed(Index row, Index col)
-{
+
+template <typename Scalar_, int Options_, typename StorageIndex_>
+EIGEN_STRONG_INLINE typename SparseMatrix<Scalar_, Options_, StorageIndex_>::Scalar&
+SparseMatrix<Scalar_, Options_, StorageIndex_>::insertAtByOuterInner(Index outer, Index inner, Index dst) {
+  if (isCompressed())
+    return insertCompressedAtByOuterInner(outer, inner, dst);
+  else
+    return insertUncompressedAtByOuterInner(outer, inner, dst);
+}
+
+template <typename Scalar_, int Options_, typename StorageIndex_>
+EIGEN_DEPRECATED EIGEN_DONT_INLINE typename SparseMatrix<Scalar_, Options_, StorageIndex_>::Scalar&
+SparseMatrix<Scalar_, Options_, StorageIndex_>::insertUncompressed(Index row, Index col) {
   eigen_assert(!isCompressed());
-
-  const Index outer = IsRowMajor ? row : col;
-  const StorageIndex inner = convert_index(IsRowMajor ? col : row);
-
-  Index room = m_outerIndex[outer+1] - m_outerIndex[outer];
-  StorageIndex innerNNZ = m_innerNonZeros[outer];
-  if(innerNNZ>=room)
-  {
-    // this inner vector is full, we need to reallocate the whole buffer :(
-    reserve(SingletonVector(outer,std::max<StorageIndex>(2,innerNNZ)));
-  }
-
-  Index startId = m_outerIndex[outer];
-  Index p = startId + m_innerNonZeros[outer];
-  while ( (p > startId) && (m_data.index(p-1) > inner) )
-  {
-    m_data.index(p) = m_data.index(p-1);
-    m_data.value(p) = m_data.value(p-1);
-    --p;
-  }
-  eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exists, you must call coeffRef to this end");
-
-  m_innerNonZeros[outer]++;
-
-  m_data.index(p) = inner;
-  return (m_data.value(p) = Scalar(0));
+  Index outer = IsRowMajor ? row : col;
+  Index inner = IsRowMajor ? col : row;
+  Index start = outerIndexPtr()[outer];
+  Index end = start + innerNonZeroPtr()[outer];
+  Index dst = data().searchLowerIndex(start, end, inner);
+  eigen_assert((dst == end || data().index(dst) != inner) &&
+               "you cannot insert an element that already exists, you must call coeffRef to this end");
+  return insertUncompressedAtByOuterInner(outer, inner, dst);
 }
 
-template<typename Scalar_, int Options_, typename StorageIndex_>
-EIGEN_DONT_INLINE typename SparseMatrix<Scalar_,Options_,StorageIndex_>::Scalar& SparseMatrix<Scalar_,Options_,StorageIndex_>::insertCompressed(Index row, Index col)
-{
+template <typename Scalar_, int Options_, typename StorageIndex_>
+EIGEN_DEPRECATED EIGEN_DONT_INLINE typename SparseMatrix<Scalar_, Options_, StorageIndex_>::Scalar&
+SparseMatrix<Scalar_, Options_, StorageIndex_>::insertCompressed(Index row, Index col) {
   eigen_assert(isCompressed());
+  Index outer = IsRowMajor ? row : col;
+  Index inner = IsRowMajor ? col : row;
+  Index start = outerIndexPtr()[outer];
+  Index end = outerIndexPtr()[outer + 1];
+  Index dst = data().searchLowerIndex(start, end, inner);
+  eigen_assert((dst == end || data().index(dst) != inner) &&
+               "you cannot insert an element that already exists, you must call coeffRef to this end");
+  return insertCompressedAtByOuterInner(outer, inner, dst);
+}
 
-  const Index outer = IsRowMajor ? row : col;
-  const Index inner = IsRowMajor ? col : row;
-
-  Index previousOuter = outer;
-  if (m_outerIndex[outer+1]==0)
-  {
-    // we start a new inner vector
-    while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
-    {
-      m_outerIndex[previousOuter] = convert_index(m_data.size());
-      --previousOuter;
-    }
-    m_outerIndex[outer+1] = m_outerIndex[outer];
+template <typename Scalar_, int Options_, typename StorageIndex_>
+EIGEN_STRONG_INLINE void SparseMatrix<Scalar_, Options_, StorageIndex_>::checkAllocatedSpaceAndMaybeExpand() {
+  // if there is no capacity for a single insertion, double the capacity
+  if (data().allocatedSize() <= data().size()) {
+    // increase capacity by a mininum of 32
+    Index minReserve = 32;
+    Index reserveSize = numext::maxi(minReserve, data().allocatedSize());
+    data().reserve(reserveSize);
   }
+}
 
-  // here we have to handle the tricky case where the outerIndex array
-  // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,
-  // the 2nd inner vector...
-  bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
-                && (std::size_t(m_outerIndex[outer+1]) == m_data.size());
+template <typename Scalar_, int Options_, typename StorageIndex_>
+typename SparseMatrix<Scalar_, Options_, StorageIndex_>::Scalar&
+SparseMatrix<Scalar_, Options_, StorageIndex_>::insertCompressedAtByOuterInner(Index outer, Index inner, Index dst) {
+  eigen_assert(isCompressed());
+  // compressed insertion always requires expanding the buffer
+  checkAllocatedSpaceAndMaybeExpand();
+  data().resize(data().size() + 1);
+  Index chunkSize = outerIndexPtr()[outerSize()] - dst;
+  // shift the existing data to the right if necessary
+  if (chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize);
+  // update nonzero counts
+  for (; outer < outerSize(); outer++) outerIndexPtr()[outer + 1]++;
+  // initialize the coefficient
+  data().index(dst) = StorageIndex(inner);
+  // return a reference to the coefficient
+  return data().value(dst) = Scalar(0);
+}
 
-  std::size_t startId = m_outerIndex[outer];
-  // FIXME let's make sure sizeof(long int) == sizeof(std::size_t)
-  std::size_t p = m_outerIndex[outer+1];
-  ++m_outerIndex[outer+1];
-
-  double reallocRatio = 1;
-  if (m_data.allocatedSize()<=m_data.size())
-  {
-    // if there is no preallocated memory, let's reserve a minimum of 32 elements
-    if (m_data.size()==0)
-    {
-      m_data.reserve(32);
-    }
-    else
-    {
-      // we need to reallocate the data, to reduce multiple reallocations
-      // we use a smart resize algorithm based on the current filling ratio
-      // in addition, we use double to avoid integers overflows
-      double nnzEstimate = double(m_outerIndex[outer])*double(m_outerSize)/double(outer+1);
-      reallocRatio = (nnzEstimate-double(m_data.size()))/double(m_data.size());
-      // furthermore we bound the realloc ratio to:
-      //   1) reduce multiple minor realloc when the matrix is almost filled
-      //   2) avoid to allocate too much memory when the matrix is almost empty
-      reallocRatio = (std::min)((std::max)(reallocRatio,1.5),8.);
+template <typename Scalar_, int Options_, typename StorageIndex_>
+typename SparseMatrix<Scalar_, Options_, StorageIndex_>::Scalar&
+SparseMatrix<Scalar_, Options_, StorageIndex_>::insertUncompressedAtByOuterInner(Index outer, Index inner, Index dst) {
+  eigen_assert(!isCompressed());
+  // find nearest outer vector to the right with capacity (if any) to minimize copy size
+  Index target = outer;
+  for (; target < outerSize(); target++) {
+    Index start = outerIndexPtr()[target];
+    Index end = start + innerNonZeroPtr()[target];
+    Index capacity = outerIndexPtr()[target + 1] - end;
+    if (capacity > 0) {
+      // `target` has room for interior insertion
+      Index chunkSize = end - dst;
+      // shift the existing data to the right if necessary
+      if(chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize);
+      break;
     }
   }
-  m_data.resize(m_data.size()+1,reallocRatio);
-
-  if (!isLastVec)
-  {
-    if (previousOuter==-1)
-    {
-      // oops wrong guess.
-      // let's correct the outer offsets
-      for (Index k=0; k<=(outer+1); ++k)
-        m_outerIndex[k] = 0;
-      Index k=outer+1;
-      while(m_outerIndex[k]==0)
-        m_outerIndex[k++] = 1;
-      while (k<=m_outerSize && m_outerIndex[k]!=0)
-        m_outerIndex[k++]++;
-      p = 0;
-      --k;
-      k = m_outerIndex[k]-1;
-      while (k>0)
-      {
-        m_data.index(k) = m_data.index(k-1);
-        m_data.value(k) = m_data.value(k-1);
-        k--;
-      }
-    }
-    else
-    {
-      // we are not inserting into the last inner vec
-      // update outer indices:
-      Index j = outer+2;
-      while (j<=m_outerSize && m_outerIndex[j]!=0)
-        m_outerIndex[j++]++;
-      --j;
-      // shift data of last vecs:
-      Index k = m_outerIndex[j]-1;
-      while (k>=Index(p))
-      {
-        m_data.index(k) = m_data.index(k-1);
-        m_data.value(k) = m_data.value(k-1);
-        k--;
-      }
-    }
+  if (target == outerSize()) {
+    // no room for interior insertion, must expand storage
+    checkAllocatedSpaceAndMaybeExpand();
+    data().resize(data().size() + 1);
+    // shift the existing data to the right if necessary
+    Index chunkSize = outerIndexPtr()[outerSize()] - dst;
+    if (chunkSize > 0) data().moveChunk(dst, dst + 1, chunkSize);
   }
-
-  while ( (p > startId) && (m_data.index(p-1) > inner) )
-  {
-    m_data.index(p) = m_data.index(p-1);
-    m_data.value(p) = m_data.value(p-1);
-    --p;
-  }
-
-  m_data.index(p) = inner;
-  return (m_data.value(p) = Scalar(0));
+  // update nonzero counts
+  innerNonZeroPtr()[outer]++;
+  for (; outer < target; outer++) outerIndexPtr()[outer + 1]++;
+  // initialize the coefficient
+  data().index(dst) = StorageIndex(inner);
+  // return a reference to the coefficient
+  return data().value(dst) = Scalar(0);
 }
 
 namespace internal {
 
-template<typename Scalar_, int Options_, typename StorageIndex_>
-struct evaluator<SparseMatrix<Scalar_,Options_,StorageIndex_> >
-  : evaluator<SparseCompressedBase<SparseMatrix<Scalar_,Options_,StorageIndex_> > >
-{
-  typedef evaluator<SparseCompressedBase<SparseMatrix<Scalar_,Options_,StorageIndex_> > > Base;
-  typedef SparseMatrix<Scalar_,Options_,StorageIndex_> SparseMatrixType;
-  evaluator() : Base() {}
-  explicit evaluator(const SparseMatrixType &mat) : Base(mat) {}
-};
+    template <typename Scalar_, int Options_, typename StorageIndex_>
+    struct evaluator<SparseMatrix<Scalar_, Options_, StorageIndex_>>
+        : evaluator<SparseCompressedBase<SparseMatrix<Scalar_, Options_, StorageIndex_>>> {
+      typedef evaluator<SparseCompressedBase<SparseMatrix<Scalar_, Options_, StorageIndex_>>> Base;
+      typedef SparseMatrix<Scalar_, Options_, StorageIndex_> SparseMatrixType;
+      evaluator() : Base() {}
+      explicit evaluator(const SparseMatrixType& mat) : Base(mat) {}
+    };
 
 }
 
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index 52ff7b70d..6e2661e8d 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -477,14 +477,45 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
         refMat_prod(r,c) *= v;
       refMat_last(r,c) = v;
     }
+
     SparseMatrixType m(rows,cols);
     m.setFromTriplets(triplets.begin(), triplets.end());
     VERIFY_IS_APPROX(m, refMat_sum);
+    VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize());
 
     m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies<Scalar>());
     VERIFY_IS_APPROX(m, refMat_prod);
+    VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize());
     m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; });
     VERIFY_IS_APPROX(m, refMat_last);
+    VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize());
+
+    // test setFromSortedTriplets
+
+    struct triplet_comp {
+      inline bool operator()(const TripletType& a, const TripletType& b) {
+        return SparseMatrixType::IsRowMajor ? ((a.row() != b.row()) ? (a.row() < b.row()) : (a.col() < b.col()))
+                                            : ((a.col() != b.col()) ? (a.col() < b.col()) : (a.row() < b.row()));
+      }
+    };
+
+    // stable_sort is only necessary when the reduction functor is dependent on the order of the triplets
+    // this is the case with refMat_last
+    // for most cases, std::sort is sufficient and preferred
+    std::stable_sort(triplets.begin(), triplets.end(), triplet_comp());
+
+    m.setZero();
+    m.setFromSortedTriplets(triplets.begin(), triplets.end());
+    VERIFY_IS_APPROX(m, refMat_sum);
+    VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize());
+
+    m.setFromSortedTriplets(triplets.begin(), triplets.end(), std::multiplies<Scalar>());
+    VERIFY_IS_APPROX(m, refMat_prod);
+    VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize());
+
+    m.setFromSortedTriplets(triplets.begin(), triplets.end(), [](Scalar, Scalar b) { return b; });
+    VERIFY_IS_APPROX(m, refMat_last);
+    VERIFY_IS_EQUAL(m.innerIndicesAreSorted(), m.outerSize());
   }
   
   // test Map