From 5b1d0cebc5c30975f6f392d6f7d49d35e2a3508f Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sun, 5 Apr 2009 16:30:10 +0000
Subject: [PATCH] sparse module: new API proposal for triangular solves and
 experimental solver support with a sparse matrix as the rhs.

---
 Eigen/Sparse                        |   1 +
 Eigen/src/Sparse/AmbiVector.h       |  15 +-
 Eigen/src/Sparse/SparseBlock.h      |  36 ++++-
 Eigen/src/Sparse/SparseLDLT.h       |   8 +-
 Eigen/src/Sparse/SparseLLT.h        |   6 +-
 Eigen/src/Sparse/SparseMatrix.h     |   1 +
 Eigen/src/Sparse/SparseMatrixBase.h |   7 +
 Eigen/src/Sparse/SparseTriangular.h |  60 ++++++++
 Eigen/src/Sparse/SparseUtil.h       |   1 +
 Eigen/src/Sparse/SuperLUSupport.h   |   2 +-
 Eigen/src/Sparse/TriangularSolver.h | 218 +++++++++++++++++++++++-----
 test/sparse_product.cpp             |   6 +-
 test/sparse_solvers.cpp             |  38 ++++-
 13 files changed, 335 insertions(+), 64 deletions(-)
 create mode 100644 Eigen/src/Sparse/SparseTriangular.h
diff --git a/Eigen/Sparse b/Eigen/Sparse
index 6de24d34d..b0f2b2a12 100644
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -104,6 +104,7 @@ namespace Eigen {
 #include "src/Sparse/SparseFlagged.h"
 #include "src/Sparse/SparseProduct.h"
 #include "src/Sparse/SparseDiagonalProduct.h"
+#include "src/Sparse/SparseTriangular.h"
 #include "src/Sparse/TriangularSolver.h"
 #include "src/Sparse/SparseLLT.h"
 #include "src/Sparse/SparseLDLT.h"
diff --git a/Eigen/src/Sparse/AmbiVector.h b/Eigen/src/Sparse/AmbiVector.h
index 75001a2fa..e7a7c8784 100644
--- a/Eigen/src/Sparse/AmbiVector.h
+++ b/Eigen/src/Sparse/AmbiVector.h
@@ -36,7 +36,7 @@ template<typename _Scalar> class AmbiVector
     typedef _Scalar Scalar;
     typedef typename NumTraits<Scalar>::Real RealScalar;
     AmbiVector(int size)
-      : m_buffer(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)
+      : m_buffer(0), m_zero(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1)
     {
       resize(size);
     }
@@ -44,7 +44,7 @@ template<typename _Scalar> class AmbiVector
     void init(RealScalar estimatedDensity);
     void init(int mode);
 
-    void nonZeros() const;
+    int nonZeros() const;
 
     /** Specifies a sub-vector to work on */
     void setBounds(int start, int end) { m_start = start; m_end = end; }
@@ -53,7 +53,7 @@ template<typename _Scalar> class AmbiVector
 
     void restart();
     Scalar& coeffRef(int i);
-    Scalar coeff(int i);
+    Scalar& coeff(int i);
 
     class Iterator;
 
@@ -112,6 +112,7 @@ template<typename _Scalar> class AmbiVector
 
     // used to store data in both mode
     Scalar* m_buffer;
+    Scalar m_zero;
     int m_size;
     int m_start;
     int m_end;
@@ -131,7 +132,7 @@ template<typename _Scalar> class AmbiVector
 
 /** \returns the number of non zeros in the current sub vector */
 template<typename Scalar>
-void AmbiVector<Scalar>::nonZeros() const
+int AmbiVector<Scalar>::nonZeros() const
 {
   if (m_mode==IsSparse)
     return m_llSize;
@@ -254,7 +255,7 @@ Scalar& AmbiVector<Scalar>::coeffRef(int i)
 }
 
 template<typename Scalar>
-Scalar AmbiVector<Scalar>::coeff(int i)
+Scalar& AmbiVector<Scalar>::coeff(int i)
 {
   if (m_mode==IsDense)
     return m_buffer[i];
@@ -264,7 +265,7 @@ Scalar AmbiVector<Scalar>::coeff(int i)
     ei_assert(m_mode==IsSparse);
     if ((m_llSize==0) || (i<llElements[m_llStart].index))
     {
-      return Scalar(0);
+      return m_zero;
     }
     else
     {
@@ -275,7 +276,7 @@ Scalar AmbiVector<Scalar>::coeff(int i)
       if (llElements[elid].index==i)
         return llElements[m_llCurrent].value;
       else
-        return Scalar(0);
+        return m_zero;
     }
   }
 }
diff --git a/Eigen/src/Sparse/SparseBlock.h b/Eigen/src/Sparse/SparseBlock.h
index c39066676..86567f1a5 100644
--- a/Eigen/src/Sparse/SparseBlock.h
+++ b/Eigen/src/Sparse/SparseBlock.h
@@ -150,6 +150,21 @@ class SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options>, Size>
     {
       return operator=<SparseInnerVectorSet>(other);
     }
+    
+    int nonZeros() const
+    { 
+      int count = 0;
+      for (int j=0; j<m_outerSize; ++j)
+        count += m_matrix._data()[m_outerStart+j].size();
+      return count;
+    }
+    
+    const Scalar& lastCoeff() const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(SparseInnerVectorSet);
+      ei_assert(m_matrix.data()[m_outerStart].size()>0);
+      return m_matrix.data()[m_outerStart].vale(m_matrix.data()[m_outerStart].size()-1);
+    }
 
 //     template<typename Sparse>
 //     inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
@@ -172,12 +187,12 @@ class SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options>, Size>
 /***************************************************************************
 * specialisation for SparseMatrix
 ***************************************************************************/
-/*
+
 template<typename _Scalar, int _Options, int Size>
 class SparseInnerVectorSet<SparseMatrix<_Scalar, _Options>, Size>
   : public SparseMatrixBase<SparseInnerVectorSet<SparseMatrix<_Scalar, _Options>, Size> >
 {
-    typedef DynamicSparseMatrix<_Scalar, _Options> MatrixType;
+    typedef SparseMatrix<_Scalar, _Options> MatrixType;
     enum { IsRowMajor = ei_traits<SparseInnerVectorSet>::IsRowMajor };
   public:
 
@@ -233,7 +248,20 @@ class SparseInnerVectorSet<SparseMatrix<_Scalar, _Options>, Size>
     { return m_matrix._valuePtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
     inline const int* _innerIndexPtr() const
     { return m_matrix._innerIndexPtr() + m_matrix._outerIndexPtr()[m_outerStart]; }
-    inline const int* _outerIndexPtr() const { return m_matrix._outerIndexPtr() + m_outerStart; }
+    inline const int* _outerIndexPtr() const
+    { return m_matrix._outerIndexPtr() + m_outerStart; }
+    
+    int nonZeros() const
+    {
+      return  size_t(m_matrix._outerIndexPtr()[m_outerStart+m_outerSize.value()])
+            - size_t(m_matrix._outerIndexPtr()[m_outerStart]); }
+    
+    const Scalar& lastCoeff() const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(SparseInnerVectorSet);
+      ei_assert(nonZeros()>0);
+      return m_matrix._valuePtr()[m_matrix._outerIndexPtr()[m_outerStart+1]-1];
+    }
 
 //     template<typename Sparse>
 //     inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other)
@@ -251,7 +279,7 @@ class SparseInnerVectorSet<SparseMatrix<_Scalar, _Options>, Size>
     const ei_int_if_dynamic<Size> m_outerSize;
 
 };
-*/
+
 //----------
 
 /** \returns the i-th row of the matrix \c *this. For row-major matrix only. */
diff --git a/Eigen/src/Sparse/SparseLDLT.h b/Eigen/src/Sparse/SparseLDLT.h
index a1bac4d08..80d67ec8a 100644
--- a/Eigen/src/Sparse/SparseLDLT.h
+++ b/Eigen/src/Sparse/SparseLDLT.h
@@ -79,7 +79,7 @@ class SparseLDLT
   protected:
     typedef typename MatrixType::Scalar Scalar;
     typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef SparseMatrix<Scalar,LowerTriangular|UnitDiagBit> CholMatrixType;
+    typedef SparseMatrix<Scalar> CholMatrixType;
     typedef Matrix<Scalar,MatrixType::ColsAtCompileTime,1> VectorType;
 
     enum {
@@ -137,7 +137,7 @@ class SparseLDLT
       *                              overloads the MemoryEfficient flags)
       *
       * \sa flags() */
-    void settagss(int f) { m_flags = f; }
+    void settags(int f) { m_flags = f; }
     /** \returns the current flags */
     int flags() const { return m_flags; }
 
@@ -333,12 +333,12 @@ bool SparseLDLT<MatrixType, Backend>::solveInPlace(MatrixBase<Derived> &b) const
     return false;
 
   if (m_matrix.nonZeros()>0) // otherwise L==I
-    m_matrix.solveTriangularInPlace(b);
+    m_matrix.template triangular<LowerTriangular|UnitDiagBit>().solveInPlace(b);
   b = b.cwise() / m_diag;
   // FIXME should be .adjoint() but it fails to compile...
 
   if (m_matrix.nonZeros()>0) // otherwise L==I
-  m_matrix.transpose().solveTriangularInPlace(b);
+    m_matrix.transpose().template triangular<UpperTriangular|UnitDiagBit>().solveInPlace(b);
 
   return true;
 }
diff --git a/Eigen/src/Sparse/SparseLLT.h b/Eigen/src/Sparse/SparseLLT.h
index 864c4415a..40cefa189 100644
--- a/Eigen/src/Sparse/SparseLLT.h
+++ b/Eigen/src/Sparse/SparseLLT.h
@@ -191,15 +191,15 @@ bool SparseLLT<MatrixType, Backend>::solveInPlace(MatrixBase<Derived> &b) const
   const int size = m_matrix.rows();
   ei_assert(size==b.rows());
 
-  m_matrix.template triangular<LowerTriangular>.solveInPlace(b);
+  m_matrix.template triangular<LowerTriangular>().solveInPlace(b);
   // FIXME should be simply .adjoint() but it fails to compile...
   if (NumTraits<Scalar>::IsComplex)
   {
     CholMatrixType aux = m_matrix.conjugate();
-    aux.transpose().template triangular<UpperTriangular>.solveInPlace(b);
+    aux.transpose().template triangular<UpperTriangular>().solveInPlace(b);
   }
   else
-    m_matrix.transpose().template triangular<UpperTriangular>.solveInPlace(b);
+    m_matrix.transpose().template triangular<UpperTriangular>().solveInPlace(b);
 
   return true;
 }
diff --git a/Eigen/src/Sparse/SparseMatrix.h b/Eigen/src/Sparse/SparseMatrix.h
index 3f09596bc..ad94e63ba 100644
--- a/Eigen/src/Sparse/SparseMatrix.h
+++ b/Eigen/src/Sparse/SparseMatrix.h
@@ -164,6 +164,7 @@ class SparseMatrix
       {
         ei_assert(m_data.index(m_data.size()-1)<inner && "wrong sorted insertion");
       }
+//       std::cerr << size_t(m_outerIndex[outer+1]) << " == " << m_data.size() << "\n";
       assert(size_t(m_outerIndex[outer+1]) == m_data.size());
       int id = m_outerIndex[outer+1];
       ++m_outerIndex[outer+1];
diff --git a/Eigen/src/Sparse/SparseMatrixBase.h b/Eigen/src/Sparse/SparseMatrixBase.h
index f20841ce1..e611744d4 100644
--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@@ -308,12 +308,19 @@ template<typename Derived> class SparseMatrixBase
     template<typename OtherDerived>
     Derived& operator*=(const SparseMatrixBase<OtherDerived>& other);
 
+    // deprecated
     template<typename OtherDerived>
     typename ei_plain_matrix_type_column_major<OtherDerived>::type
     solveTriangular(const MatrixBase<OtherDerived>& other) const;
 
+    // deprecated
     template<typename OtherDerived>
     void solveTriangularInPlace(MatrixBase<OtherDerived>& other) const;
+//     template<typename OtherDerived>
+//     void solveTriangularInPlace(SparseMatrixBase<OtherDerived>& other) const;
+
+    template<int Mode>
+    inline const SparseTriangular<Derived, Mode> triangular() const;
 
     template<typename OtherDerived> Scalar dot(const MatrixBase<OtherDerived>& other) const;
     template<typename OtherDerived> Scalar dot(const SparseMatrixBase<OtherDerived>& other) const;
diff --git a/Eigen/src/Sparse/SparseTriangular.h b/Eigen/src/Sparse/SparseTriangular.h
new file mode 100644
index 000000000..ca16caf96
--- /dev/null
+++ b/Eigen/src/Sparse/SparseTriangular.h
@@ -0,0 +1,60 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_SPARSE_TRIANGULAR_H
+#define EIGEN_SPARSE_TRIANGULAR_H
+
+template<typename ExpressionType, int Mode> class SparseTriangular
+{
+  public:
+
+    typedef typename ei_traits<ExpressionType>::Scalar Scalar;
+    typedef typename ei_meta_if<ei_must_nest_by_value<ExpressionType>::ret,
+        ExpressionType, const ExpressionType&>::ret ExpressionTypeNested;
+    typedef CwiseUnaryOp<ei_scalar_add_op<Scalar>, ExpressionType> ScalarAddReturnType;
+
+    inline SparseTriangular(const ExpressionType& matrix) : m_matrix(matrix) {}
+
+    /** \internal */
+    inline const ExpressionType& _expression() const { return m_matrix; }
+
+    template<typename OtherDerived>
+    typename ei_plain_matrix_type_column_major<OtherDerived>::type
+    solve(const MatrixBase<OtherDerived>& other) const;
+    template<typename OtherDerived> void solveInPlace(MatrixBase<OtherDerived>& other) const;
+    template<typename OtherDerived> void solveInPlace(SparseMatrixBase<OtherDerived>& other) const;
+    
+  protected:
+    ExpressionTypeNested m_matrix;
+};
+
+template<typename Derived>
+template<int Mode>
+inline const SparseTriangular<Derived, Mode>
+SparseMatrixBase<Derived>::triangular() const
+{
+  return derived();
+}
+
+#endif // EIGEN_SPARSE_TRIANGULAR_H
diff --git a/Eigen/src/Sparse/SparseUtil.h b/Eigen/src/Sparse/SparseUtil.h
index 393cdda6e..6610bdf30 100644
--- a/Eigen/src/Sparse/SparseUtil.h
+++ b/Eigen/src/Sparse/SparseUtil.h
@@ -113,6 +113,7 @@ template<typename UnaryOp,   typename MatrixType>        class SparseCwiseUnaryO
 template<typename BinaryOp,  typename Lhs, typename Rhs> class SparseCwiseBinaryOp;
 template<typename ExpressionType,
          unsigned int Added, unsigned int Removed>       class SparseFlagged;
+template<typename ExpressionType, int Mode>              class SparseTriangular;
 template<typename Lhs, typename Rhs>                     class SparseDiagonalProduct;
 
 template<typename Lhs, typename Rhs> struct ei_sparse_product_mode;
diff --git a/Eigen/src/Sparse/SuperLUSupport.h b/Eigen/src/Sparse/SuperLUSupport.h
index 3c9a4fcce..cf0f802ed 100644
--- a/Eigen/src/Sparse/SuperLUSupport.h
+++ b/Eigen/src/Sparse/SuperLUSupport.h
@@ -543,7 +543,7 @@ typename SparseLU<MatrixType,SuperLU>::Scalar SparseLU<MatrixType,SuperLU>::dete
   if (m_extractedDataAreDirty)
     extractData();
 
-  // TODO this code coule be moved to the default/base backend
+  // TODO this code could be moved to the default/base backend
   // FIXME perhaps we have to take into account the scale factors m_sluRscale and m_sluCscale ???
   Scalar det = Scalar(1);
   for (int j=0; j<m_u.cols(); ++j)
diff --git a/Eigen/src/Sparse/TriangularSolver.h b/Eigen/src/Sparse/TriangularSolver.h
index 8948ae45e..168439b30 100644
--- a/Eigen/src/Sparse/TriangularSolver.h
+++ b/Eigen/src/Sparse/TriangularSolver.h
@@ -25,9 +25,18 @@
 #ifndef EIGEN_SPARSETRIANGULARSOLVER_H
 #define EIGEN_SPARSETRIANGULARSOLVER_H
 
+template<typename Lhs, typename Rhs, int Mode,
+  int UpLo = (Mode & LowerTriangularBit)
+           ? LowerTriangular
+           : (Mode & UpperTriangularBit)
+           ? UpperTriangular
+           : -1,
+  int StorageOrder = int(ei_traits<Lhs>::Flags) & RowMajorBit>
+struct ei_sparse_solve_triangular_selector;
+
 // forward substitution, row-major
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,LowerTriangular,RowMajor|IsSparse>
+template<typename Lhs, typename Rhs, int Mode>
+struct ei_sparse_solve_triangular_selector<Lhs,Rhs,Mode,LowerTriangular,RowMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   static void run(const Lhs& lhs, Rhs& other)
@@ -45,7 +54,7 @@ struct ei_solve_triangular_selector<Lhs,Rhs,LowerTriangular,RowMajor|IsSparse>
           lastIndex = it.index();
           tmp -= lastVal * other.coeff(lastIndex,col);
         }
-        if (Lhs::Flags & UnitDiagBit)
+        if (Mode & UnitDiagBit)
           other.coeffRef(i,col) = tmp;
         else
         {
@@ -58,8 +67,8 @@ struct ei_solve_triangular_selector<Lhs,Rhs,LowerTriangular,RowMajor|IsSparse>
 };
 
 // backward substitution, row-major
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,UpperTriangular,RowMajor|IsSparse>
+template<typename Lhs, typename Rhs, int Mode>
+struct ei_sparse_solve_triangular_selector<Lhs,Rhs,Mode,UpperTriangular,RowMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   static void run(const Lhs& lhs, Rhs& other)
@@ -77,7 +86,7 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpperTriangular,RowMajor|IsSparse>
           tmp -= it.value() * other.coeff(it.index(),col);
         }
 
-        if (Lhs::Flags & UnitDiagBit)
+        if (Mode & UnitDiagBit)
           other.coeffRef(i,col) = tmp;
         else
         {
@@ -91,8 +100,8 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpperTriangular,RowMajor|IsSparse>
 };
 
 // forward substitution, col-major
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,LowerTriangular,ColMajor|IsSparse>
+template<typename Lhs, typename Rhs, int Mode>
+struct ei_sparse_solve_triangular_selector<Lhs,Rhs,Mode,LowerTriangular,ColMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   static void run(const Lhs& lhs, Rhs& other)
@@ -101,26 +110,28 @@ struct ei_solve_triangular_selector<Lhs,Rhs,LowerTriangular,ColMajor|IsSparse>
     {
       for(int i=0; i<lhs.cols(); ++i)
       {
-        typename Lhs::InnerIterator it(lhs, i);
-        if(!(Lhs::Flags & UnitDiagBit))
+        Scalar& tmp = other.coeffRef(i,col);
+        if (tmp!=Scalar(0)) // optimization when other is actually sparse
         {
-          // std::cerr << it.value() << " ; " << it.index() << " == " << i << "\n";
-          ei_assert(it.index()==i);
-          other.coeffRef(i,col) /= it.value();
+          typename Lhs::InnerIterator it(lhs, i);
+          if(!(Mode & UnitDiagBit))
+          {
+            ei_assert(it.index()==i);
+            tmp /= it.value();
+          }
+          if (it.index()==i)
+            ++it;
+          for(; it; ++it)
+            other.coeffRef(it.index(), col) -= tmp * it.value();
         }
-        Scalar tmp = other.coeffRef(i,col);
-        if (it.index()==i)
-          ++it;
-        for(; it; ++it)
-          other.coeffRef(it.index(), col) -= tmp * it.value();
       }
     }
   }
 };
 
 // backward substitution, col-major
-template<typename Lhs, typename Rhs>
-struct ei_solve_triangular_selector<Lhs,Rhs,UpperTriangular,ColMajor|IsSparse>
+template<typename Lhs, typename Rhs, int Mode>
+struct ei_sparse_solve_triangular_selector<Lhs,Rhs,Mode,UpperTriangular,ColMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   static void run(const Lhs& lhs, Rhs& other)
@@ -129,29 +140,32 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpperTriangular,ColMajor|IsSparse>
     {
       for(int i=lhs.cols()-1; i>=0; --i)
       {
-        if(!(Lhs::Flags & UnitDiagBit))
+        Scalar& tmp = other.coeffRef(i,col);
+        if (tmp!=Scalar(0)) // optimization when other is actually sparse
         {
-          // FIXME lhs.coeff(i,i) might not be always efficient while it must simply be the
-          // last element of the column !
-          other.coeffRef(i,col) /= lhs.coeff(i,i);
+          if(!(Mode & UnitDiagBit))
+          {
+            // FIXME lhs.coeff(i,i) might not be always efficient while it must simply be the
+            // last element of the column !
+            other.coeffRef(i,col) /= lhs.innerVector(i).lastCoeff();
+          }
+          typename Lhs::InnerIterator it(lhs, i);
+          for(; it && it.index()<i; ++it)
+            other.coeffRef(it.index(), col) -= tmp * it.value();
         }
-        Scalar tmp = other.coeffRef(i,col);
-        typename Lhs::InnerIterator it(lhs, i);
-        for(; it && it.index()<i; ++it)
-          other.coeffRef(it.index(), col) -= tmp * it.value();
       }
     }
   }
 };
 
-template<typename Derived>
+template<typename ExpressionType,int Mode>
 template<typename OtherDerived>
-void SparseMatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other) const
+void SparseTriangular<ExpressionType,Mode>::solveInPlace(MatrixBase<OtherDerived>& other) const
 {
-  ei_assert(derived().cols() == derived().rows());
-  ei_assert(derived().cols() == other.rows());
-  ei_assert(!(Flags & ZeroDiagBit));
-  ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
+  ei_assert(m_matrix.cols() == m_matrix.rows());
+  ei_assert(m_matrix.cols() == other.rows());
+  ei_assert(!(Mode & ZeroDiagBit));
+  ei_assert(Mode & (UpperTriangularBit|LowerTriangularBit));
 
   enum { copy = ei_traits<OtherDerived>::Flags & RowMajorBit };
 
@@ -159,19 +173,151 @@ void SparseMatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>&
     typename ei_plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::ret OtherCopy;
   OtherCopy otherCopy(other.derived());
 
-  ei_solve_triangular_selector<Derived, typename ei_unref<OtherCopy>::type>::run(derived(), otherCopy);
+  ei_sparse_solve_triangular_selector<ExpressionType, typename ei_unref<OtherCopy>::type, Mode>::run(m_matrix, otherCopy);
 
   if (copy)
     other = otherCopy;
 }
 
+template<typename ExpressionType,int Mode>
+template<typename OtherDerived>
+typename ei_plain_matrix_type_column_major<OtherDerived>::type
+SparseTriangular<ExpressionType,Mode>::solve(const MatrixBase<OtherDerived>& other) const
+{
+  typename ei_plain_matrix_type_column_major<OtherDerived>::type res(other);
+  solveInPlace(res);
+  return res;
+}
+
+// pure sparse path
+
+template<typename Lhs, typename Rhs, int Mode,
+  int UpLo = (Mode & LowerTriangularBit)
+           ? LowerTriangular
+           : (Mode & UpperTriangularBit)
+           ? UpperTriangular
+           : -1,
+  int StorageOrder = int(Lhs::Flags) & (RowMajorBit)>
+struct ei_sparse_solve_triangular_sparse_selector;
+
+// forward substitution, col-major
+template<typename Lhs, typename Rhs, int Mode, int UpLo>
+struct ei_sparse_solve_triangular_sparse_selector<Lhs,Rhs,Mode,UpLo,ColMajor>
+{
+  typedef typename Rhs::Scalar Scalar;
+  static void run(const Lhs& lhs, Rhs& other)
+  {
+    const bool IsLowerTriangular = (UpLo==LowerTriangular);
+    AmbiVector<Scalar> tempVector(other.rows()*2);
+    tempVector.setBounds(0,other.rows());
+    
+    Rhs res(other.rows(), other.cols());
+    res.startFill(other.nonZeros());
+    
+    for(int col=0 ; col<other.cols() ; ++col)
+    {
+      // FIXME estimate number of non zeros
+      tempVector.init(.99/*float(other.col(col).nonZeros())/float(other.rows())*/);
+      tempVector.setZero();
+      tempVector.restart();
+      for (typename Rhs::InnerIterator rhsIt(other, col); rhsIt; ++rhsIt)
+      {
+        tempVector.coeffRef(rhsIt.index()) = rhsIt.value();
+      }
+      
+      for(int i=IsLowerTriangular?0:lhs.cols()-1;
+          IsLowerTriangular?i<lhs.cols():i>=0;
+          i+=IsLowerTriangular?1:-1)
+      {
+        tempVector.restart();
+        Scalar& ci = tempVector.coeffRef(i);
+        if (ci!=Scalar(0))
+        {
+          // find 
+          typename Lhs::InnerIterator it(lhs, i);
+          if(!(Mode & UnitDiagBit))
+          {
+            if (IsLowerTriangular)
+            {
+              ei_assert(it.index()==i);
+              ci /= it.value();
+            }
+            else
+              ci /= lhs.coeff(i,i);
+          }
+          tempVector.restart();
+          if (IsLowerTriangular)
+          {
+            if (it.index()==i)
+              ++it;
+            for(; it; ++it)
+              tempVector.coeffRef(it.index()) -= ci * it.value();
+          }
+          else
+          {
+            for(; it && it.index()<i; ++it)
+              tempVector.coeffRef(it.index()) -= ci * it.value();
+          }
+        }
+      }
+      
+      
+      int count = 0;
+      // FIXME compute a reference value to filter zeros
+      for (typename AmbiVector<Scalar>::Iterator it(tempVector/*,1e-12*/); it; ++it)
+      {
+        ++ count;
+//         std::cerr << "fill " << it.index() << ", " << col << "\n";
+//         std::cout << it.value() << "  ";
+        res.fill(it.index(), col) = it.value();
+      }
+//       std::cout << "tempVector.nonZeros() == " << int(count) << " / " << (other.rows()) << "\n";
+    }
+    res.endFill();
+    other = res.markAsRValue();
+  }
+};
+
+template<typename ExpressionType,int Mode>
+template<typename OtherDerived>
+void SparseTriangular<ExpressionType,Mode>::solveInPlace(SparseMatrixBase<OtherDerived>& other) const
+{
+  ei_assert(m_matrix.cols() == m_matrix.rows());
+  ei_assert(m_matrix.cols() == other.rows());
+  ei_assert(!(Mode & ZeroDiagBit));
+  ei_assert(Mode & (UpperTriangularBit|LowerTriangularBit));
+
+//   enum { copy = ei_traits<OtherDerived>::Flags & RowMajorBit };
+
+//   typedef typename ei_meta_if<copy,
+//     typename ei_plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::ret OtherCopy;
+//   OtherCopy otherCopy(other.derived());
+
+  ei_sparse_solve_triangular_sparse_selector<ExpressionType, OtherDerived, Mode>::run(m_matrix, other.derived());
+
+//   if (copy)
+//     other = otherCopy;
+}
+
+
+// deprecated stuff:
+
+/** \deprecated */
+template<typename Derived>
+template<typename OtherDerived>
+void SparseMatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other) const
+{
+  this->template triangular<Flags&(UpperTriangularBit|LowerTriangularBit)>().solveInPlace(other);
+}
+
+/** \deprecated */
 template<typename Derived>
 template<typename OtherDerived>
 typename ei_plain_matrix_type_column_major<OtherDerived>::type
 SparseMatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const
 {
   typename ei_plain_matrix_type_column_major<OtherDerived>::type res(other);
-  solveTriangularInPlace(res);
+  derived().solveTriangularInPlace(res);
   return res;
 }
 
diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp
index 3a0356f00..00b5a3ed5 100644
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp
@@ -40,6 +40,7 @@ template<typename SparseMatrixType> void sparse_product(const SparseMatrixType&
     DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
     DenseMatrix refMat3 = DenseMatrix::Zero(rows, rows);
     DenseMatrix refMat4 = DenseMatrix::Zero(rows, rows);
+    DenseMatrix refMat5 = DenseMatrix::Random(rows, rows);
     DenseMatrix dm4 = DenseMatrix::Zero(rows, rows);
     SparseMatrixType m2(rows, rows);
     SparseMatrixType m3(rows, rows);
@@ -57,7 +58,10 @@ template<typename SparseMatrixType> void sparse_product(const SparseMatrixType&
     VERIFY_IS_APPROX(dm4=m2*refMat3.transpose(), refMat4=refMat2*refMat3.transpose());
     VERIFY_IS_APPROX(dm4=m2.transpose()*refMat3, refMat4=refMat2.transpose()*refMat3);
     VERIFY_IS_APPROX(dm4=m2.transpose()*refMat3.transpose(), refMat4=refMat2.transpose()*refMat3.transpose());
-
+    
+    VERIFY_IS_APPROX(dm4=m2*(refMat3+refMat3), refMat4=refMat2*(refMat3+refMat3));
+    VERIFY_IS_APPROX(dm4=m2.transpose()*(refMat3+refMat5)*0.5, refMat4=refMat2.transpose()*(refMat3+refMat5)*0.5);
+    
     // dense * sparse
     VERIFY_IS_APPROX(dm4=refMat2*m3, refMat4=refMat2*refMat3);
     VERIFY_IS_APPROX(dm4=refMat2*m3.transpose(), refMat4=refMat2*refMat3.transpose());
diff --git a/test/sparse_solvers.cpp b/test/sparse_solvers.cpp
index 8ce4e2264..d1090dfed 100644
--- a/test/sparse_solvers.cpp
+++ b/test/sparse_solvers.cpp
@@ -63,17 +63,39 @@ template<typename Scalar> void sparse_solvers(int rows, int cols)
     SparseMatrix<Scalar> m2(rows, cols);
     DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
 
-    // lower
+    // lower - dense
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords);
+    VERIFY_IS_APPROX(refMat2.template marked<LowerTriangular>().solveTriangular(vec2),
+                     m2.template triangular<LowerTriangular>().solve(vec3));
+
+    // upper - dense
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords);
+    VERIFY_IS_APPROX(refMat2.template marked<UpperTriangular>().solveTriangular(vec2),
+                     m2.template triangular<UpperTriangular>().solve(vec3));
+    
+    // TODO test row major
+    
+    SparseMatrix<Scalar> matB(rows, rows);
+    DenseMatrix refMatB = DenseMatrix::Zero(rows, rows);
+    
+    // lower - sparse
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular);
+    initSparse<Scalar>(density, refMatB, matB);
+    refMat2.template marked<LowerTriangular>().solveTriangularInPlace(refMatB);
+    m2.template triangular<LowerTriangular>().solveInPlace(matB);
+    VERIFY_IS_APPROX(matB.toDense(), refMatB);
+
+    // upper - sparse
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular);
+    initSparse<Scalar>(density, refMatB, matB);
+    refMat2.template marked<UpperTriangular>().solveTriangularInPlace(refMatB);
+    m2.template triangular<UpperTriangular>().solveInPlace(matB);
+    VERIFY_IS_APPROX(matB, refMatB);
+    
+    // test deprecated API
     initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords);
     VERIFY_IS_APPROX(refMat2.template marked<LowerTriangular>().solveTriangular(vec2),
                      m2.template marked<LowerTriangular>().solveTriangular(vec3));
-
-    // upper
-    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords);
-    VERIFY_IS_APPROX(refMat2.template marked<UpperTriangular>().solveTriangular(vec2),
-                     m2.template marked<UpperTriangular>().solveTriangular(vec3));
-
-    // TODO test row major
   }
 
   // test LLT