Sparse module:

* several fixes (transpose, matrix product, etc...) * Added a basic cholesky factorization * Added a low level hybrid dense/sparse vector class to help writing code involving intensive read/write in a fixed vector. It is currently used to implement the matrix product itself as well as in the Cholesky factorization.
2025-09-30 02:03:14 +08:00 · 2008-10-04 14:23:00 +00:00 · 2008-10-04 14:23:00 +00:00 · 068ff3370d
commit 068ff3370d
parent 1fc503e3ce
8 changed files with 859 additions and 103 deletions
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@ -13,6 +13,7 @@ namespace Eigen {
 #include "src/Sparse/SparseUtil.h"
 #include "src/Sparse/SparseMatrixBase.h"
 #include "src/Sparse/SparseArray.h"
+#include "src/Sparse/AmbiVector.h"
 #include "src/Sparse/SparseBlock.h"
 #include "src/Sparse/SparseMatrix.h"
 #include "src/Sparse/HashMatrix.h"
@ -21,6 +22,7 @@ namespace Eigen {
 #include "src/Sparse/SparseSetter.h"
 #include "src/Sparse/SparseProduct.h"
 #include "src/Sparse/TriangularSolver.h"
+#include "src/Sparse/BasicSparseCholesky.h"

 } // namespace Eigen

--- a/Eigen/src/Cholesky/Cholesky.h
+++ b/Eigen/src/Cholesky/Cholesky.h
@ -59,7 +59,7 @@ template<typename MatrixType> class Cholesky
    };

  public:
-  
+
    Cholesky(const MatrixType& matrix)
      : m_matrix(matrix.rows(), matrix.cols())
    {
--- a/Eigen/src/Sparse/AmbiVector.h
+++ b/Eigen/src/Sparse/AmbiVector.h
@ -0,0 +1,348 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_AMBIVECTOR_H
+#define EIGEN_AMBIVECTOR_H
+
+/** \internal
+  * Hybrid sparse/dense vector class designed for intensive read-write operations.
+  *
+  * See BasicSparseCholesky and SparseProduct for usage examples.
+  */
+template<typename _Scalar> class AmbiVector
+{
+  public:
+    typedef _Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    AmbiVector(int size)
+      : m_buffer(0), m_size(0), m_allocatedSize(0), m_mode(-1)
+    {
+      resize(size);
+    }
+
+    void init(RealScalar estimatedDensity);
+    void init(int mode);
+
+    void nonZeros() const;
+
+    /** Specifies a sub-vector to work on */
+    void setBounds(int start, int end) { m_start = start; m_end = end; }
+
+    void setZero();
+
+    void restart();
+    Scalar& coeffRef(int i);
+    Scalar coeff(int i);
+
+    class Iterator;
+
+    ~AmbiVector() { delete[] m_buffer; }
+
+    void resize(int size)
+    {
+      if (m_allocatedSize < size)
+        reallocate(size);
+      m_size = size;
+    }
+
+    int size() const { return m_size; }
+
+  protected:
+
+    void reallocate(int size)
+    {
+      Scalar* newBuffer = new Scalar[size/* *4 + (size * sizeof(int)*2)/sizeof(Scalar)+1 */];
+      int copySize = std::min(size, m_size);
+      memcpy(newBuffer,  m_buffer,  copySize * sizeof(Scalar));
+      delete[] m_buffer;
+      m_buffer = newBuffer;
+      m_allocatedSize = size;
+
+      m_size = size;
+      m_start = 0;
+      m_end = m_size;
+    }
+
+  protected:
+    // element type of the linked list
+    struct ListEl
+    {
+      int next;
+      int index;
+      Scalar value;
+    };
+
+    // used to store data in both mode
+    Scalar* m_buffer;
+    int m_size;
+    int m_start;
+    int m_end;
+    int m_allocatedSize;
+    int m_mode;
+
+    // linked list mode
+    int m_llStart;
+    int m_llCurrent;
+    int m_llSize;
+
+  private:
+    AmbiVector(const AmbiVector&);
+
+};
+
+/** \returns the number of non zeros in the current sub vector */
+template<typename Scalar>
+void AmbiVector<Scalar>::nonZeros() const
+{
+  if (m_mode==IsSparse)
+    return m_llSize;
+  else
+    return m_end - m_start;
+}
+
+template<typename Scalar>
+void AmbiVector<Scalar>::init(RealScalar estimatedDensity)
+{
+  if (m_mode = estimatedDensity>0.1)
+    init(IsDense);
+  else
+    init(IsSparse);
+}
+
+template<typename Scalar>
+void AmbiVector<Scalar>::init(int mode)
+{
+  m_mode = mode;
+  if (m_mode==IsSparse)
+  {
+    m_llSize = 0;
+    m_llStart = -1;
+  }
+}
+
+/** Must be called whenever we might perform a write access
+  * with an index smaller than the previous one.
+  *
+  * Don't worry, this function is extremely cheap.
+  */
+template<typename Scalar>
+void AmbiVector<Scalar>::restart()
+{
+  m_llCurrent = m_llStart;
+}
+
+/** Set all coefficients of current subvector to zero */
+template<typename Scalar>
+void AmbiVector<Scalar>::setZero()
+{
+  if (m_mode==IsDense)
+  {
+    for (int i=m_start; i<m_end; ++i)
+      m_buffer[i] = Scalar(0);
+  }
+  else
+  {
+    ei_assert(m_mode==IsSparse);
+    m_llSize = 0;
+    m_llStart = -1;
+  }
+}
+
+template<typename Scalar>
+Scalar& AmbiVector<Scalar>::coeffRef(int i)
+{
+  if (m_mode==IsDense)
+    return m_buffer[i];
+  else
+  {
+    ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_buffer);
+    // TODO factorize the following code to reduce code generation
+    ei_assert(m_mode==IsSparse);
+    if (m_llSize==0)
+    {
+      // this is the first element
+      m_llStart = 0;
+      m_llCurrent = 0;
+      m_llSize++;
+      llElements[0].value = Scalar(0);
+      llElements[0].index = i;
+      llElements[0].next = -1;
+      return llElements[0].value;
+    }
+    else if (i<llElements[m_llStart].index)
+    {
+      // this is going to be the new first element of the list
+      ListEl& el = llElements[m_llSize];
+      el.value = Scalar(0);
+      el.index = i;
+      el.next = m_llStart;
+      m_llStart = m_llSize;
+      m_llSize++;
+      m_llCurrent = m_llStart;
+      return el.value;
+    }
+    else
+    {
+      int nextel = llElements[m_llCurrent].next;
+      ei_assert(i>=llElements[m_llCurrent].index && "you must call restart() before inserting an element with lower or equal index");
+      while (nextel >= 0 && llElements[nextel].index<=i)
+      {
+        m_llCurrent = nextel;
+        nextel = llElements[nextel].next;
+      }
+
+      if (llElements[m_llCurrent].index==i)
+      {
+        // the coefficient already exists and we found it !
+        return llElements[m_llCurrent].value;
+      }
+      else
+      {
+        // let's insert a new coefficient
+        ListEl& el = llElements[m_llSize];
+        el.value = Scalar(0);
+        el.index = i;
+        el.next = llElements[m_llCurrent].next;
+        llElements[m_llCurrent].next = m_llSize;
+        m_llSize++;
+        return el.value;
+      }
+    }
+  }
+}
+
+template<typename Scalar>
+Scalar AmbiVector<Scalar>::coeff(int i)
+{
+  if (m_mode==IsDense)
+    return m_buffer[i];
+  else
+  {
+    ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_buffer);
+    ei_assert(m_mode==IsSparse);
+    if ((m_llSize==0) || (i<llElements[m_llStart].index))
+    {
+      return Scalar(0);
+    }
+    else
+    {
+      int elid = m_llStart;
+      while (elid >= 0 && llElements[elid].index<i)
+        elid = llElements[elid].next;
+
+      if (llElements[elid].index==i)
+        return llElements[m_llCurrent].value;
+      else
+        return Scalar(0);
+    }
+  }
+}
+
+/** Iterator over the nonzero coefficients */
+template<typename _Scalar>
+class AmbiVector<_Scalar>::Iterator
+{
+  public:
+    typedef _Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    /** Default constructor
+      * \param vec the vector on which we iterate
+      * \param nonZeroReferenceValue reference value used to prune zero coefficients.
+      * In practice, the coefficient are compared to \a nonZeroReferenceValue * precision<Scalar>().
+      */
+    Iterator(const AmbiVector& vec, RealScalar nonZeroReferenceValue = RealScalar(0.1)) : m_vector(vec)
+    {
+      m_epsilon = nonZeroReferenceValue * precision<Scalar>();
+      m_isDense = m_vector.m_mode==IsDense;
+      if (m_isDense)
+      {
+        m_cachedIndex = m_vector.m_start-1;
+        ++(*this);
+      }
+      else
+      {
+        ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_vector.m_buffer);
+        m_currentEl = m_vector.m_llStart;
+        while (m_currentEl>=0 && ei_abs(llElements[m_currentEl].value)<m_epsilon)
+          m_currentEl = llElements[m_currentEl].next;
+        if (m_currentEl<0)
+        {
+          m_cachedIndex = -1;
+        }
+        else
+        {
+          m_cachedIndex = llElements[m_currentEl].index;
+          m_cachedValue = llElements[m_currentEl].value;
+        }
+      }
+    }
+
+    int index() const { return m_cachedIndex; }
+    Scalar value() const { return m_cachedValue; }
+
+    operator bool() const { return m_cachedIndex>=0; }
+
+    Iterator& operator++()
+    {
+      if (m_isDense)
+      {
+        do {
+          m_cachedIndex++;
+        } while (m_cachedIndex<m_vector.m_end && ei_abs(m_vector.m_buffer[m_cachedIndex])<m_epsilon);
+        if (m_cachedIndex<m_vector.m_end)
+          m_cachedValue = m_vector.m_buffer[m_cachedIndex];
+        else
+          m_cachedIndex=-1;
+      }
+      else
+      {
+        ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_vector.m_buffer);
+        do {
+          m_currentEl = llElements[m_currentEl].next;
+        } while (m_currentEl>=0 && ei_abs(llElements[m_currentEl].value)<m_epsilon);
+        if (m_currentEl<0)
+        {
+          m_cachedIndex = -1;
+        }
+        else
+        {
+          m_cachedIndex = llElements[m_currentEl].index;
+          m_cachedValue = llElements[m_currentEl].value;
+        }
+      }
+      return *this;
+    }
+
+  protected:
+    const AmbiVector& m_vector; // the target vector
+    int m_currentEl;            // the current element in sparse/linked-list mode
+    RealScalar m_epsilon;       // epsilon used to prune zero coefficients
+    int m_cachedIndex;          // current coordinate
+    Scalar m_cachedValue;       // current value
+    bool m_isDense;             // mode of the vector
+};
+
+
+#endif // EIGEN_AMBIVECTOR_H
--- a/Eigen/src/Sparse/BasicSparseCholesky.h
+++ b/Eigen/src/Sparse/BasicSparseCholesky.h
@ -0,0 +1,440 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_BASICSPARSECHOLESKY_H
+#define EIGEN_BASICSPARSECHOLESKY_H
+
+/** \ingroup Sparse_Module
+  *
+  * \class BasicSparseCholesky
+  *
+  * \brief Standard Cholesky decomposition of a matrix and associated features
+  *
+  * \param MatrixType the type of the matrix of which we are computing the Cholesky decomposition
+  *
+  * \sa class Cholesky, class CholeskyWithoutSquareRoot
+  */
+template<typename MatrixType> class BasicSparseCholesky
+{
+  private:
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+    typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
+
+    enum {
+      PacketSize = ei_packet_traits<Scalar>::size,
+      AlignmentMask = int(PacketSize)-1
+    };
+
+  public:
+
+    BasicSparseCholesky(const MatrixType& matrix)
+      : m_matrix(matrix.rows(), matrix.cols())
+    {
+      compute(matrix);
+    }
+
+    inline const MatrixType& matrixL(void) const { return m_matrix; }
+
+    /** \returns true if the matrix is positive definite */
+    inline bool isPositiveDefinite(void) const { return m_isPositiveDefinite; }
+
+//     template<typename Derived>
+//     typename Derived::Eval solve(const MatrixBase<Derived> &b) const;
+
+    void compute(const MatrixType& matrix);
+
+  protected:
+    /** \internal
+      * Used to compute and store L
+      * The strict upper part is not used and even not initialized.
+      */
+    MatrixType m_matrix;
+    bool m_isPositiveDefinite;
+
+    struct ListEl
+    {
+      int next;
+      int index;
+      Scalar value;
+    };
+};
+
+/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
+  */
+#ifdef IGEN_BASICSPARSECHOLESKY_H
+template<typename MatrixType>
+void BasicSparseCholesky<MatrixType>::compute(const MatrixType& a)
+{
+  assert(a.rows()==a.cols());
+  const int size = a.rows();
+  m_matrix.resize(size, size);
+  const RealScalar eps = ei_sqrt(precision<Scalar>());
+
+  // allocate a temporary vector for accumulations
+  AmbiVector<Scalar> tempVector(size);
+
+  // TODO estimate the number of nnz
+  m_matrix.startFill(a.nonZeros()*2);
+  for (int j = 0; j < size; ++j)
+  {
+//     std::cout << j << "\n";
+    Scalar x = ei_real(a.coeff(j,j));
+    int endSize = size-j-1;
+
+    // TODO estimate the number of non zero entries
+//       float ratioLhs = float(lhs.nonZeros())/float(lhs.rows()*lhs.cols());
+//       float avgNnzPerRhsColumn = float(rhs.nonZeros())/float(cols);
+//       float ratioRes = std::min(ratioLhs * avgNnzPerRhsColumn, 1.f);
+
+        // let's do a more accurate determination of the nnz ratio for the current column j of res
+        //float ratioColRes = std::min(ratioLhs * rhs.innerNonZeros(j), 1.f);
+        // FIXME find a nice way to get the number of nonzeros of a sub matrix (here an inner vector)
+//         float ratioColRes = ratioRes;
+//         if (ratioColRes>0.1)
+//     tempVector.init(IsSparse);
+    tempVector.init(IsDense);
+    tempVector.setBounds(j+1,size);
+    tempVector.setZero();
+    // init with current matrix a
+    {
+      typename MatrixType::InnerIterator it(a,j);
+      ++it; // skip diagonal element
+      for (; it; ++it)
+        tempVector.coeffRef(it.index()) = it.value();
+    }
+    for (int k=0; k<j+1; ++k)
+    {
+      typename MatrixType::InnerIterator it(m_matrix, k);
+      while (it && it.index()<j)
+        ++it;
+      if (it && it.index()==j)
+      {
+        Scalar y = it.value();
+        x -= ei_abs2(y);
+        ++it; // skip j-th element, and process remaing column coefficients
+        tempVector.restart();
+        for (; it; ++it)
+        {
+          tempVector.coeffRef(it.index()) -= it.value() * y;
+        }
+      }
+    }
+    // copy the temporary vector to the respective m_matrix.col()
+    // while scaling the result by 1/real(x)
+    RealScalar rx = ei_sqrt(ei_real(x));
+    m_matrix.fill(j,j) = rx;
+    Scalar y = Scalar(1)/rx;
+    for (typename AmbiVector<Scalar>::Iterator it(tempVector); it; ++it)
+    {
+      m_matrix.fill(it.index(), j) = it.value() * y;
+    }
+  }
+  m_matrix.endFill();
+}
+
+
+#else
+
+template<typename MatrixType>
+void BasicSparseCholesky<MatrixType>::compute(const MatrixType& a)
+{
+  assert(a.rows()==a.cols());
+  const int size = a.rows();
+  m_matrix.resize(size, size);
+  const RealScalar eps = ei_sqrt(precision<Scalar>());
+
+  // allocate a temporary buffer
+  Scalar* buffer = new Scalar[size*2];
+
+
+  m_matrix.startFill(a.nonZeros()*2);
+
+//   RealScalar x;
+//   x = ei_real(a.coeff(0,0));
+//   m_isPositiveDefinite = x > eps && ei_isMuchSmallerThan(ei_imag(a.coeff(0,0)), RealScalar(1));
+//   m_matrix.fill(0,0) = ei_sqrt(x);
+//   m_matrix.col(0).end(size-1) = a.row(0).end(size-1).adjoint() / ei_real(m_matrix.coeff(0,0));
+  for (int j = 0; j < size; ++j)
+  {
+//     std::cout << j << " " << std::flush;
+//     Scalar tmp = ei_real(a.coeff(j,j));
+//     if (j>0)
+//       tmp -= m_matrix.row(j).start(j).norm2();
+//     x = ei_real(tmp);
+//     std::cout << "x = " << x << "\n";
+//     if (x < eps || (!ei_isMuchSmallerThan(ei_imag(tmp), RealScalar(1))))
+//     {
+//       m_isPositiveDefinite = false;
+//       return;
+//     }
+//     m_matrix.fill(j,j) = x = ei_sqrt(x);
+
+    Scalar x = ei_real(a.coeff(j,j));
+//     if (j>0)
+//       x -= m_matrix.row(j).start(j).norm2();
+//     RealScalar rx = ei_sqrt(ei_real(x));
+//     m_matrix.fill(j,j) = rx;
+    int endSize = size-j-1;
+    /*if (endSize>0)*/ {
+      // Note that when all matrix columns have good alignment, then the following
+      // product is guaranteed to be optimal with respect to alignment.
+//       m_matrix.col(j).end(endSize) =
+//         (m_matrix.block(j+1, 0, endSize, j) * m_matrix.row(j).start(j).adjoint()).lazy();
+
+      // FIXME could use a.col instead of a.row
+//       m_matrix.col(j).end(endSize) = (a.row(j).end(endSize).adjoint()
+//         - m_matrix.col(j).end(endSize) ) / x;
+
+      // make sure to call innerSize/outerSize since we fake the storage order.
+
+
+
+
+      // estimate the number of non zero entries
+//       float ratioLhs = float(lhs.nonZeros())/float(lhs.rows()*lhs.cols());
+//       float avgNnzPerRhsColumn = float(rhs.nonZeros())/float(cols);
+//       float ratioRes = std::min(ratioLhs * avgNnzPerRhsColumn, 1.f);
+
+
+//       for (int j1=0; j1<cols; ++j1)
+      {
+        // let's do a more accurate determination of the nnz ratio for the current column j of res
+        //float ratioColRes = std::min(ratioLhs * rhs.innerNonZeros(j), 1.f);
+        // FIXME find a nice way to get the number of nonzeros of a sub matrix (here an inner vector)
+//         float ratioColRes = ratioRes;
+//         if (ratioColRes>0.1)
+        if (true)
+        {
+          // dense path, the scalar * columns products are accumulated into a dense column
+          Scalar* __restrict__ tmp = buffer;
+          // set to zero
+          for (int k=j+1; k<size; ++k)
+            tmp[k] = 0;
+          // init with current matrix a
+          {
+            typename MatrixType::InnerIterator it(a,j);
+            ++it;
+            for (; it; ++it)
+              tmp[it.index()] = it.value();
+          }
+          for (int k=0; k<j+1; ++k)
+          {
+//             Scalar y = m_matrix.coeff(j,k);
+//             if (!ei_isMuchSmallerThan(ei_abs(y),Scalar(1)))
+//             {
+            typename MatrixType::InnerIterator it(m_matrix, k);
+            while (it && it.index()<j)
+              ++it;
+            if (it && it.index()==j)
+            {
+              Scalar y = it.value();
+              x -= ei_abs2(y);
+//               if (!ei_isMuchSmallerThan(ei_abs(y),Scalar(0.1)))
+              {
+                ++it;
+                for (; it; ++it)
+                {
+                  tmp[it.index()] -= it.value() * y;
+                }
+              }
+            }
+          }
+          // copy the temporary to the respective m_matrix.col()
+          RealScalar rx = ei_sqrt(ei_real(x));
+          m_matrix.fill(j,j) = rx;
+          Scalar y = Scalar(1)/rx;
+          for (int k=j+1; k<size; ++k)
+            //if (tmp[k]!=0)
+            if (!ei_isMuchSmallerThan(ei_abs(tmp[k]),Scalar(0.01)))
+              m_matrix.fill(k, j) = tmp[k]*y;
+        }
+        else
+        {
+          ListEl* __restrict__ tmp = reinterpret_cast<ListEl*>(buffer);
+          // sparse path, the scalar * columns products are accumulated into a linked list
+          int tmp_size = 0;
+          int tmp_start = -1;
+
+          {
+            int tmp_el = tmp_start;
+            typename MatrixType::InnerIterator it(a,j);
+            if (it)
+            {
+              ++it;
+              for (; it; ++it)
+              {
+                Scalar v = it.value();
+                int id = it.index();
+                if (tmp_size==0)
+                {
+                  tmp_start = 0;
+                  tmp_el = 0;
+                  tmp_size++;
+                  tmp[0].value = v;
+                  tmp[0].index = id;
+                  tmp[0].next = -1;
+                }
+                else if (id<tmp[tmp_start].index)
+                {
+                  tmp[tmp_size].value = v;
+                  tmp[tmp_size].index = id;
+                  tmp[tmp_size].next = tmp_start;
+                  tmp_start = tmp_size;
+                  tmp_el = tmp_start;
+                  tmp_size++;
+                }
+                else
+                {
+                  int nextel = tmp[tmp_el].next;
+                  while (nextel >= 0 && tmp[nextel].index<=id)
+                  {
+                    tmp_el = nextel;
+                    nextel = tmp[nextel].next;
+                  }
+
+                  if (tmp[tmp_el].index==id)
+                  {
+                    tmp[tmp_el].value = v;
+                  }
+                  else
+                  {
+                    tmp[tmp_size].value = v;
+                    tmp[tmp_size].index = id;
+                    tmp[tmp_size].next = tmp[tmp_el].next;
+                    tmp[tmp_el].next = tmp_size;
+                    tmp_size++;
+                  }
+                }
+              }
+            }
+          }
+//           for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+          for (int k=0; k<j+1; ++k)
+          {
+//             Scalar y = m_matrix.coeff(j,k);
+//             if (!ei_isMuchSmallerThan(ei_abs(y),Scalar(1)))
+//             {
+            int tmp_el = tmp_start;
+            typename MatrixType::InnerIterator it(m_matrix, k);
+            while (it && it.index()<j)
+              ++it;
+            if (it && it.index()==j)
+            {
+              Scalar y = it.value();
+              x -= ei_abs2(y);
+              for (; it; ++it)
+              {
+                Scalar v = -it.value() * y;
+                int id = it.index();
+                if (tmp_size==0)
+                {
+//                   std::cout << "insert because size==0\n";
+                  tmp_start = 0;
+                  tmp_el = 0;
+                  tmp_size++;
+                  tmp[0].value = v;
+                  tmp[0].index = id;
+                  tmp[0].next = -1;
+                }
+                else if (id<tmp[tmp_start].index)
+                {
+//                   std::cout << "insert because not in (0) " << id << " " << tmp[tmp_start].index << " " << tmp_start << "\n";
+                  tmp[tmp_size].value = v;
+                  tmp[tmp_size].index = id;
+                  tmp[tmp_size].next = tmp_start;
+                  tmp_start = tmp_size;
+                  tmp_el = tmp_start;
+                  tmp_size++;
+                }
+                else
+                {
+                  int nextel = tmp[tmp_el].next;
+                  while (nextel >= 0 && tmp[nextel].index<=id)
+                  {
+                    tmp_el = nextel;
+                    nextel = tmp[nextel].next;
+                  }
+
+                  if (tmp[tmp_el].index==id)
+                  {
+                    tmp[tmp_el].value -= v;
+                  }
+                  else
+                  {
+//                     std::cout << "insert because not in (1)\n";
+                    tmp[tmp_size].value = v;
+                    tmp[tmp_size].index = id;
+                    tmp[tmp_size].next = tmp[tmp_el].next;
+                    tmp[tmp_el].next = tmp_size;
+                    tmp_size++;
+                  }
+                }
+              }
+            }
+          }
+          RealScalar rx = ei_sqrt(ei_real(x));
+          m_matrix.fill(j,j) = rx;
+          Scalar y = Scalar(1)/rx;
+          int k = tmp_start;
+          while (k>=0)
+          {
+            if (!ei_isMuchSmallerThan(ei_abs(tmp[k].value),Scalar(0.01)))
+            {
+//               std::cout << "fill " << tmp[k].index << "," << j << "\n";
+              m_matrix.fill(tmp[k].index, j) = tmp[k].value * y;
+            }
+            k = tmp[k].next;
+          }
+        }
+      }
+
+    }
+  }
+  m_matrix.endFill();
+}
+
+#endif
+
+/** \returns the solution of \f$ A x = b \f$ using the current decomposition of A.
+  * In other words, it returns \f$ A^{-1} b \f$ computing
+  * \f$ {L^{*}}^{-1} L^{-1} b \f$ from right to left.
+  * \param b the column vector \f$ b \f$, which can also be a matrix.
+  *
+  * Example: \include Cholesky_solve.cpp
+  * Output: \verbinclude Cholesky_solve.out
+  *
+  * \sa MatrixBase::cholesky(), CholeskyWithoutSquareRoot::solve()
+  */
+// template<typename MatrixType>
+// template<typename Derived>
+// typename Derived::Eval Cholesky<MatrixType>::solve(const MatrixBase<Derived> &b) const
+// {
+//   const int size = m_matrix.rows();
+//   ei_assert(size==b.rows());
+//
+//   return m_matrix.adjoint().template part<Upper>().solveTriangular(matrixL().solveTriangular(b));
+// }
+
+#endif // EIGEN_BASICSPARSECHOLESKY_H
--- a/Eigen/src/Sparse/LinkedVectorMatrix.h
+++ b/Eigen/src/Sparse/LinkedVectorMatrix.h
@ -149,6 +149,7 @@ class LinkedVectorMatrix
    {
      const int outer = RowMajor ? row : col;
      const int inner = RowMajor ? col : row;
+//       std::cout << " ll fill " << outer << "," << inner << "\n";
      if (m_ends[outer]==0)
      {
        m_data[outer] = m_ends[outer] = new VectorChunk();
@ -171,6 +172,29 @@ class LinkedVectorMatrix

    inline void endFill() { }

+    void printDbg()
+    {
+      for (int j=0; j<m_data.size(); ++j)
+      {
+        VectorChunk* el = m_data[j];
+        while (el)
+        {
+          for (int i=0; i<el->size; ++i)
+            std::cout << j << "," << el->data[i].index << " = " << el->data[i].value << "\n";
+          el = el->next;
+        }
+      }
+      for (int j=0; j<m_data.size(); ++j)
+      {
+        InnerIterator it(*this,j);
+        while (it)
+        {
+          std::cout << j << "," << it.index() << " = " << it.value() << "\n";
+          ++it;
+        }
+      }
+    }
+
    ~LinkedVectorMatrix()
    {
      clear();
@ -267,7 +291,16 @@ class LinkedVectorMatrix<Scalar,_Flags>::InnerIterator
      : m_matrix(mat), m_el(mat.m_data[col]), m_it(0)
    {}

-    InnerIterator& operator++() { if (m_it<m_el->size) m_it++; else {m_el = m_el->next; m_it=0;}; return *this; }
+    InnerIterator& operator++()
+    {
+      m_it++;
+      if (m_it>=m_el->size)
+      {
+        m_el = m_el->next;
+        m_it = 0;
+      }
+      return *this;
+    }

    Scalar value() { return m_el->data[m_it].value; }

--- a/Eigen/src/Sparse/SparseMatrix.h
+++ b/Eigen/src/Sparse/SparseMatrix.h
@ -133,10 +133,10 @@ class SparseMatrix
    {
      const int outer = RowMajor ? row : col;
      const int inner = RowMajor ? col : row;
-
+//       std::cout << " fill " << outer << "," << inner << "\n";
      if (m_outerIndex[outer+1]==0)
      {
-        int i=col;
+        int i = outer;
        while (i>=0 && m_outerIndex[i]==0)
        {
          m_outerIndex[i] = m_data.size();
@ -204,6 +204,7 @@ class SparseMatrix

    inline SparseMatrix& operator=(const SparseMatrix& other)
    {
+//       std::cout << "SparseMatrix& operator=(const SparseMatrix& other)\n";
      if (other.isRValue())
      {
        swap(other.const_cast_derived());
@ -221,6 +222,7 @@ class SparseMatrix
    template<typename OtherDerived>
    inline SparseMatrix& operator=(const MatrixBase<OtherDerived>& other)
    {
+//       std::cout << "SparseMatrix& operator=(const MatrixBase<OtherDerived>& other)\n";
      return SparseMatrixBase<SparseMatrix>::operator=(other.derived());
    }

--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@ -51,6 +51,7 @@ class SparseMatrixBase : public MatrixBase<Derived>

    inline Derived& operator=(const Derived& other)
    {
+//       std::cout << "Derived& operator=(const Derived& other)\n";
      if (other.isRValue())
        derived().swap(other.const_cast_derived());
      else
@ -61,7 +62,9 @@ class SparseMatrixBase : public MatrixBase<Derived>
    template<typename OtherDerived>
    inline Derived& operator=(const MatrixBase<OtherDerived>& other)
    {
+//       std::cout << "Derived& operator=(const MatrixBase<OtherDerived>& other)\n";
      const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
+//       std::cout << "eval transpose = " << transpose << "\n";
      const int outerSize = other.outerSize();
      typedef typename ei_meta_if<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::ret TempType;
      TempType temp(other.rows(), other.cols());
@ -88,6 +91,8 @@ class SparseMatrixBase : public MatrixBase<Derived>
    template<typename OtherDerived>
    inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other)
    {
+//       std::cout << typeid(OtherDerived).name() << "\n";
+//       std::cout << Flags << " " << OtherDerived::Flags << "\n";
      const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
 //       std::cout << "eval transpose = " << transpose << "\n";
      const int outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? other.rows() : other.cols();
--- a/Eigen/src/Sparse/SparseProduct.h
+++ b/Eigen/src/Sparse/SparseProduct.h
@ -41,22 +41,22 @@ struct ProductReturnType<Lhs,Rhs,SparseProduct>
  // type of the temporary to perform the transpose op
  typedef typename ei_meta_if<TransposeLhs,
    SparseMatrix<Scalar,0>,
-    typename ei_nested<Lhs,Rhs::RowsAtCompileTime>::type>::ret LhsNested;
+    const typename ei_nested<Lhs,Rhs::RowsAtCompileTime>::type>::ret LhsNested;

  typedef typename ei_meta_if<TransposeRhs,
    SparseMatrix<Scalar,0>,
-    typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
+    const typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;

-  typedef Product<typename ei_unconst<LhsNested>::type,
-                  typename ei_unconst<RhsNested>::type, SparseProduct> Type;
+  typedef Product<LhsNested,
+                  RhsNested, SparseProduct> Type;
 };

 template<typename LhsNested, typename RhsNested>
 struct ei_traits<Product<LhsNested, RhsNested, SparseProduct> >
 {
  // clean the nested types:
-  typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
-  typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
+  typedef typename ei_cleantype<LhsNested>::type _LhsNested;
+  typedef typename ei_cleantype<RhsNested>::type _RhsNested;
  typedef typename _LhsNested::Scalar Scalar;

  enum {
@ -118,8 +118,8 @@ template<typename LhsNested, typename RhsNested> class Product<LhsNested,RhsNest
    const _LhsNested& rhs() const { return m_rhs; }

  protected:
-    const LhsNested m_lhs;
-    const RhsNested m_rhs;
+    LhsNested m_lhs;
+    RhsNested m_rhs;
 };

 template<typename Lhs, typename Rhs, typename ResultType,
@ -133,23 +133,16 @@ struct ei_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
 {
  typedef typename ei_traits<typename ei_cleantype<Lhs>::type>::Scalar Scalar;

-  struct ListEl
-  {
-    int next;
-    int index;
-    Scalar value;
-  };
-
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
    // make sure to call innerSize/outerSize since we fake the storage order.
    int rows = lhs.innerSize();
    int cols = rhs.outerSize();
    int size = lhs.outerSize();
-    ei_assert(size == rhs.rows());
+    ei_assert(size == rhs.innerSize());

    // allocate a temporary buffer
-    Scalar* buffer = new Scalar[rows];
+    AmbiVector<Scalar> tempVector(rows);

    // estimate the number of non zero entries
    float ratioLhs = float(lhs.nonZeros())/float(lhs.rows()*lhs.cols());
@ -164,89 +157,19 @@ struct ei_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
      //float ratioColRes = std::min(ratioLhs * rhs.innerNonZeros(j), 1.f);
      // FIXME find a nice way to get the number of nonzeros of a sub matrix (here an inner vector)
      float ratioColRes = ratioRes;
-      if (ratioColRes>0.1)
+      tempVector.init(ratioColRes);
+      tempVector.setZero();
+      for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
      {
-        // dense path, the scalar * columns products are accumulated into a dense column
-        Scalar* __restrict__ tmp = buffer;
-        // set to zero
-        for (int k=0; k<rows; ++k)
-          tmp[k] = 0;
-        for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
+        // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
+        Scalar x = rhsIt.value();
+        for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
        {
-          // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
-          Scalar x = rhsIt.value();
-          for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
-          {
-            tmp[lhsIt.index()] += lhsIt.value() * x;
-          }
-        }
-        // copy the temporary to the respective res.col()
-        for (int k=0; k<rows; ++k)
-          if (tmp[k]!=0)
-            res.fill(k, j) = tmp[k];
-      }
-      else
-      {
-        ListEl* __restrict__ tmp = reinterpret_cast<ListEl*>(buffer);
-        // sparse path, the scalar * columns products are accumulated into a linked list
-        int tmp_size = 0;
-        int tmp_start = -1;
-        for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
-        {
-          int tmp_el = tmp_start;
-          for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
-          {
-            Scalar v = lhsIt.value() * rhsIt.value();
-            int id = lhsIt.index();
-            if (tmp_size==0)
-            {
-              tmp_start = 0;
-              tmp_el = 0;
-              tmp_size++;
-              tmp[0].value = v;
-              tmp[0].index = id;
-              tmp[0].next = -1;
-            }
-            else if (id<tmp[tmp_start].index)
-            {
-              tmp[tmp_size].value = v;
-              tmp[tmp_size].index = id;
-              tmp[tmp_size].next = tmp_start;
-              tmp_start = tmp_size;
-              tmp_size++;
-            }
-            else
-            {
-              int nextel = tmp[tmp_el].next;
-              while (nextel >= 0 && tmp[nextel].index<=id)
-              {
-                tmp_el = nextel;
-                nextel = tmp[nextel].next;
-              }
-
-              if (tmp[tmp_el].index==id)
-              {
-                tmp[tmp_el].value += v;
-              }
-              else
-              {
-                tmp[tmp_size].value = v;
-                tmp[tmp_size].index = id;
-                tmp[tmp_size].next = tmp[tmp_el].next;
-                tmp[tmp_el].next = tmp_size;
-                tmp_size++;
-              }
-            }
-          }
-        }
-        int k = tmp_start;
-        while (k>=0)
-        {
-          if (tmp[k].value!=0)
-            res.fill(tmp[k].index, j) = tmp[k].value;
-          k = tmp[k].next;
+          tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
        }
      }
+      for (typename AmbiVector<Scalar>::Iterator it(tempVector); it; ++it)
+        res.fill(it.index(), j) = it.value();
    }
    res.endFill();
  }
@ -269,7 +192,7 @@ struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    // let's transpose the product and fake the matrices are column major
+    // let's transpose the product to get a column x column product
    ei_sparse_product_selector<Rhs,Lhs,ResultType,ColMajor,ColMajor,ColMajor>::run(rhs, lhs, res);
  }
 };
@ -280,8 +203,11 @@ struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>
  typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    // let's transpose the product and fake the matrices are column major
-    ei_sparse_product_selector<Rhs,Lhs,ResultType,ColMajor,ColMajor,RowMajor>::run(rhs, lhs, res);
+    // let's transpose the product to get a column x column product
+    SparseTemporaryType _res(res.cols(), res.rows());
+    ei_sparse_product_selector<Rhs,Lhs,ResultType,ColMajor,ColMajor,ColMajor>
+      ::run(rhs, lhs, _res);
+    res = _res.transpose();
  }
 };