From 1a1b2e9f27db619303e7f212f9bf5c58a2dd988c Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 10 Jul 2009 10:41:26 +0200
Subject: [PATCH] finally directly calling the low-level products is faster

---
 Eigen/src/Core/GenericPacketMath.h            | 10 +++
 Eigen/src/Core/Product.h                      |  3 +-
 Eigen/src/Core/SolveTriangular.h              | 81 +++++++++----------
 Eigen/src/Core/arch/SSE/PacketMath.h          |  2 +-
 Eigen/src/Core/products/GeneralMatrixMatrix.h |  6 +-
 .../Core/products/SelfadjointMatrixVector.h   |  8 --
 Eigen/src/Core/util/Macros.h                  |  7 ++
 Eigen/src/Core/util/Meta.h                    | 12 +++
 test/cholesky.cpp                             | 20 ++---
 test/main.h                                   |  1 +
 test/product_extra.cpp                        | 53 ++----------
 test/triangular.cpp                           | 54 +++++--------
 12 files changed, 111 insertions(+), 146 deletions(-)
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 693e53f11..77e5641ff 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -245,5 +245,15 @@ inline void ei_palign(PacketType& first, const PacketType& second)
   ei_palign_impl<Offset,PacketType>::run(first,second);
 }
 
+/***************************************************************************
+* Fast complex products (GCC generates a function call which is very slow)
+***************************************************************************/
+
+template<> inline std::complex<float> ei_pmul(const std::complex<float>& a, const std::complex<float>& b)
+{ return std::complex<float>(ei_real(a)*ei_real(b) - ei_imag(a)*ei_imag(b), ei_imag(a)*ei_real(b) + ei_real(a)*ei_imag(b)); }
+
+template<> inline std::complex<double> ei_pmul(const std::complex<double>& a, const std::complex<double>& b)
+{ return std::complex<double>(ei_real(a)*ei_real(b) - ei_imag(a)*ei_imag(b), ei_imag(a)*ei_real(b) + ei_real(a)*ei_imag(b)); }
+
 #endif // EIGEN_GENERIC_PACKET_MATH_H
 
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 8bd6af1b8..bd99bfee8 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -281,8 +281,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
       */
     EIGEN_STRONG_INLINE bool _useCacheFriendlyProduct() const
     {
-      #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 16
-      // TODO do something more accurate here
+      // TODO do something more accurate here (especially for mat-vec products)
       return m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
               && (  rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
                  || cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD);
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index b28078fa1..452d40a4c 100644
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -27,7 +27,6 @@
 
 template<typename Lhs, typename Rhs,
   int Mode, // Upper/Lower | UnitDiag
-//   bool ConjugateLhs, bool ConjugateRhs,
   int UpLo = (Mode & LowerTriangularBit)
            ? LowerTriangular
            : (Mode & UpperTriangularBit)
@@ -38,15 +37,20 @@ template<typename Lhs, typename Rhs,
 struct ei_triangular_solver_selector;
 
 // forward substitution, row-major
-template<typename Lhs, typename Rhs, int Mode, /*bool ConjugateLhs, bool ConjugateRhs,*/ int UpLo>
-struct ei_triangular_solver_selector<Lhs,Rhs,Mode,/*ConjugateLhs,ConjugateRhs,*/UpLo,RowMajor>
+template<typename Lhs, typename Rhs, int Mode, int UpLo>
+struct ei_triangular_solver_selector<Lhs,Rhs,Mode,UpLo,RowMajor>
 {
   typedef typename Rhs::Scalar Scalar;
+  typedef ei_product_factor_traits<Lhs> LhsProductTraits;
+  typedef typename LhsProductTraits::ActualXprType ActualLhsType;
+  enum {
+    IsLowerTriangular = (UpLo==LowerTriangular)
+  };
   static void run(const Lhs& lhs, Rhs& other)
-  {//std::cerr << "row maj " << ConjugateLhs << " , " << ConjugateRhs
-//     << " " << typeid(Lhs).name() << "\n";
-    static const int PanelWidth = 40; // TODO make this a user definable constant
-    static const bool IsLowerTriangular = (UpLo==LowerTriangular);
+  {//std::cerr << "row maj " << LhsProductTraits::NeedToConjugate << "\n";
+    static const int PanelWidth = EIGEN_TUNE_TRSV_PANEL_WIDTH;
+    const ActualLhsType& actualLhs = LhsProductTraits::extract(lhs);
+    
     const int size = lhs.cols();
     for(int c=0 ; c<other.cols() ; ++c)
     {
@@ -61,15 +65,12 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,/*ConjugateLhs,ConjugateRhs,*/
         {
           int startRow = IsLowerTriangular ? pi : pi-actualPanelWidth;
           int startCol = IsLowerTriangular ? 0 : pi;
-//           Block<Rhs,Dynamic,1> target(other,startRow,c,actualPanelWidth,1);
-          
-//           ei_cache_friendly_product_rowmajor_times_vector<ConjugateLhs,ConjugateRhs>(
-//             &(lhs.const_cast_derived().coeffRef(startRow,startCol)), lhs.stride(),
-//             &(other.coeffRef(startCol, c)), r,
-//             target, Scalar(-1));
-          other.col(c).segment(startRow,actualPanelWidth) -=
-              lhs.block(startRow,startCol,actualPanelWidth,r)
-            * other.col(c).segment(startCol,r);
+          Block<Rhs,Dynamic,1> target(other,startRow,c,actualPanelWidth,1);
+
+          ei_cache_friendly_product_rowmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
+            &(actualLhs.const_cast_derived().coeffRef(startRow,startCol)), actualLhs.stride(),
+            &(other.coeffRef(startCol, c)), r,
+            target, Scalar(-1));
         }
 
         for(int k=0; k<actualPanelWidth; ++k)
@@ -83,7 +84,6 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,/*ConjugateLhs,ConjugateRhs,*/
           if(!(Mode & UnitDiagBit))
             other.coeffRef(i,c) /= lhs.coeff(i,i);
         }
-        
       }
     }
   }
@@ -94,17 +94,23 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,/*ConjugateLhs,ConjugateRhs,*/
 //  - inv(LowerTriangular,UnitDiag,ColMajor) * Column vector
 //  - inv(UpperTriangular,         ColMajor) * Column vector
 //  - inv(UpperTriangular,UnitDiag,ColMajor) * Column vector
-template<typename Lhs, typename Rhs, int Mode, /*bool ConjugateLhs, bool ConjugateRhs,*/ int UpLo>
-struct ei_triangular_solver_selector<Lhs,Rhs,Mode,/*ConjugateLhs,ConjugateRhs,*/UpLo,ColMajor>
+template<typename Lhs, typename Rhs, int Mode, int UpLo>
+struct ei_triangular_solver_selector<Lhs,Rhs,Mode,UpLo,ColMajor>
 {
   typedef typename Rhs::Scalar Scalar;
   typedef typename ei_packet_traits<Scalar>::type Packet;
-  enum { PacketSize =  ei_packet_traits<Scalar>::size };
+  typedef ei_product_factor_traits<Lhs> LhsProductTraits;
+  typedef typename LhsProductTraits::ActualXprType ActualLhsType;
+  enum {
+    PacketSize =  ei_packet_traits<Scalar>::size,
+    IsLowerTriangular = (UpLo==LowerTriangular)
+  };
 
   static void run(const Lhs& lhs, Rhs& other)
-  {//std::cerr << "col maj " << ConjugateLhs << " , " << ConjugateRhs << "\n";
-    static const int PanelWidth = 4; // TODO make this a user definable constant
-    static const bool IsLowerTriangular = (UpLo==LowerTriangular);
+  {//std::cerr << "col maj " << LhsProductTraits::NeedToConjugate << "\n";
+    static const int PanelWidth = EIGEN_TUNE_TRSV_PANEL_WIDTH;
+    const ActualLhsType& actualLhs = LhsProductTraits::extract(lhs);
+    
     const int size = lhs.cols();
     for(int c=0 ; c<other.cols() ; ++c)
     {
@@ -133,16 +139,15 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,/*ConjugateLhs,ConjugateRhs,*/
         int r = IsLowerTriangular ? size - endBlock : startBlock; // remaining size
         if (r > 0)
         {
-//           ei_cache_friendly_product_colmajor_times_vector<ConjugateLhs,ConjugateRhs>(
-//             r,
-//             &(lhs.const_cast_derived().coeffRef(endBlock,startBlock)), lhs.stride(),
-//             other.col(c).segment(startBlock, actualPanelWidth),
-//             &(other.coeffRef(endBlock, c)),
-//             Scalar(-1));
-
-          other.col(c).segment(endBlock,r) -=
-              lhs.block(endBlock,startBlock,r,actualPanelWidth)
-            * other.col(c).segment(startBlock,actualPanelWidth);
+          // let's directly call this function because:
+          // 1 - it is faster to compile
+          // 2 - it is slighlty faster at runtime
+          ei_cache_friendly_product_colmajor_times_vector<LhsProductTraits::NeedToConjugate,false>(
+            r,
+            &(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.stride(),
+            other.col(c).segment(startBlock, actualPanelWidth),
+            &(other.coeffRef(endBlock, c)),
+            Scalar(-1));
         }
       }
     }
@@ -168,21 +173,13 @@ void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<RhsDerived>&
   ei_assert(!(Mode & ZeroDiagBit));
   ei_assert(Mode & (UpperTriangularBit|LowerTriangularBit));
 
-//   typedef ei_product_factor_traits<MatrixType> LhsProductTraits;
-//   typedef ei_product_factor_traits<RhsDerived> RhsProductTraits;
-//   typedef typename LhsProductTraits::ActualXprType ActualLhsType;
-//   typedef typename RhsProductTraits::ActualXprType ActualRhsType;
-//   const ActualLhsType& actualLhs = LhsProductTraits::extract(_expression());
-//   ActualRhsType& actualRhs = const_cast<ActualRhsType&>(RhsProductTraits::extract(rhs));
-
   enum { copy = ei_traits<RhsDerived>::Flags & RowMajorBit };
-//   std::cerr << typeid(MatrixType).name() << "\n";
   typedef typename ei_meta_if<copy,
     typename ei_plain_matrix_type_column_major<RhsDerived>::type, RhsDerived&>::ret RhsCopy;
   RhsCopy rhsCopy(rhs);
 
   ei_triangular_solver_selector<MatrixType, typename ei_unref<RhsCopy>::type,
-    Mode/*, LhsProductTraits::NeedToConjugate,RhsProductTraits::NeedToConjugate*/>::run(_expression(), rhsCopy);
+    Mode>::run(_expression(), rhsCopy);
 
   if (copy)
     rhs = rhsCopy;
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 287008b3e..3f1fd8ef5 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -26,7 +26,7 @@
 #define EIGEN_PACKET_MATH_SSE_H
 
 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
-#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 16
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
 #endif
 
 typedef __m128  Packet4f;
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index afd97b340..0036fe390 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -47,8 +47,7 @@ template<> struct ei_conj_helper<false,true>
   { return c + pmul(x,y); }
 
   template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
-  //{ return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_imag(x)*ei_real(y) - ei_real(x)*ei_imag(y)); }
-  { return x * ei_conj(y); }
+  { return std::complex<T>(ei_real(x)*ei_real(y) + ei_imag(x)*ei_imag(y), ei_imag(x)*ei_real(y) - ei_real(x)*ei_imag(y)); }
 };
 
 template<> struct ei_conj_helper<true,false>
@@ -68,8 +67,7 @@ template<> struct ei_conj_helper<true,true>
   { return c + pmul(x,y); }
 
   template<typename T> std::complex<T> pmul(const std::complex<T>& x, const std::complex<T>& y) const
-//   { return std::complex<T>(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
-  { return ei_conj(x) * ei_conj(y); }
+  { return std::complex<T>(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
 };
 
 #ifndef EIGEN_EXTERN_INSTANTIATIONS
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index 65210ed33..fbdeb148f 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -25,14 +25,6 @@
 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
 
-template<bool Conjugate> struct ei_conj_if {
-  template<typename Scalar> Scalar operator() (const Scalar& x) const { return ei_conj(x); }
-};
-
-template<> struct ei_conj_if<false> {
-  template<typename Scalar> Scalar& operator() (Scalar& x) const { return x; }
-};
-
 /* Optimized col-major selfadjoint matrix * vector product:
  * This algorithm processes 2 columns at onces that allows to both reduce
  * the number of load/stores of the result by a factor 2 and to reduce
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 38af7caf3..bb73b03cc 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -94,6 +94,13 @@
 #define EIGEN_TUNE_FOR_CPU_CACHE_SIZE (sizeof(float)*256*256)
 #endif
 
+/** Defines the maximal width of the blocks used in the triangular solver
+  * for vectors (level 2 blas xTRSV). The default is 8.
+  */
+#ifndef EIGEN_TUNE_TRSV_PANEL_WIDTH
+#define EIGEN_TUNE_TRSV_PANEL_WIDTH 8
+#endif
+
 /** Allows to disable some optimizations which might affect the accuracy of the result.
   * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
   * They currently include:
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 2f5363275..029d966fd 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -198,4 +198,16 @@ template<typename T> struct ei_is_diagonal<DiagonalWrapper<T> >
 template<typename T, int S> struct ei_is_diagonal<DiagonalMatrix<T,S> >
 { enum { ret = true }; };
 
+template<bool Conjugate> struct ei_conj_if;
+
+template<> struct ei_conj_if<true> {
+  template<typename T>
+  inline T operator()(const T& x) { return ei_conj(x); }
+};
+
+template<> struct ei_conj_if<false> {
+  template<typename T>
+  inline const T& operator()(const T& x) { return x; }
+};
+
 #endif // EIGEN_META_H
diff --git a/test/cholesky.cpp b/test/cholesky.cpp
index 253f67282..4b3952a62 100644
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@@ -142,18 +142,18 @@ template<typename MatrixType> void cholesky_verify_assert()
 void test_cholesky()
 {
   for(int i = 0; i < g_repeat; i++) {
-//     CALL_SUBTEST( cholesky(Matrix<double,1,1>()) );
-//     CALL_SUBTEST( cholesky(MatrixXd(1,1)) );
-//     CALL_SUBTEST( cholesky(Matrix2d()) );
-//     CALL_SUBTEST( cholesky(Matrix3f()) );
+    CALL_SUBTEST( cholesky(Matrix<double,1,1>()) );
+    CALL_SUBTEST( cholesky(MatrixXd(1,1)) );
+    CALL_SUBTEST( cholesky(Matrix2d()) );
+    CALL_SUBTEST( cholesky(Matrix3f()) );
     CALL_SUBTEST( cholesky(Matrix4d()) );
     CALL_SUBTEST( cholesky(MatrixXcd(7,7)) );
-//     CALL_SUBTEST( cholesky(MatrixXd(17,17)) );
-//     CALL_SUBTEST( cholesky(MatrixXf(200,200)) );
+    CALL_SUBTEST( cholesky(MatrixXd(17,17)) );
+    CALL_SUBTEST( cholesky(MatrixXf(200,200)) );
   }
 
-//   CALL_SUBTEST( cholesky_verify_assert<Matrix3f>() );
-//   CALL_SUBTEST( cholesky_verify_assert<Matrix3d>() );
-//   CALL_SUBTEST( cholesky_verify_assert<MatrixXf>() );
-//   CALL_SUBTEST( cholesky_verify_assert<MatrixXd>() );
+  CALL_SUBTEST( cholesky_verify_assert<Matrix3f>() );
+  CALL_SUBTEST( cholesky_verify_assert<Matrix3d>() );
+  CALL_SUBTEST( cholesky_verify_assert<MatrixXf>() );
+  CALL_SUBTEST( cholesky_verify_assert<MatrixXd>() );
 }
diff --git a/test/main.h b/test/main.h
index a6a780603..76572d9b3 100644
--- a/test/main.h
+++ b/test/main.h
@@ -28,6 +28,7 @@
 #include <iostream>
 #include <string>
 #include <vector>
+#include <typeinfo>
 
 #ifndef EIGEN_TEST_FUNC
 #error EIGEN_TEST_FUNC must be defined
diff --git a/test/product_extra.cpp b/test/product_extra.cpp
index d73974886..e750be65e 100644
--- a/test/product_extra.cpp
+++ b/test/product_extra.cpp
@@ -47,8 +47,6 @@ template<typename MatrixType> void product_extra(const MatrixType& m)
              square2 = MatrixType::Random(cols, cols),
              res2 = MatrixType::Random(cols, cols);
   RowVectorType v1 = RowVectorType::Random(rows), vrres(rows);
-//              v2 = RowVectorType::Random(rows),
-//              vzero = RowVectorType::Zero(rows);
   ColVectorType vc2 = ColVectorType::Random(cols), vcres(cols);
   OtherMajorMatrixType tm1 = m1;
 
@@ -56,14 +54,14 @@ template<typename MatrixType> void product_extra(const MatrixType& m)
          s2 = ei_random<Scalar>(),
          s3 = ei_random<Scalar>();
 
-  int c0 = ei_random<int>(0,cols/2-1),
-      c1 = ei_random<int>(cols/2,cols),
-      r0 = ei_random<int>(0,rows/2-1),
-      r1 = ei_random<int>(rows/2,rows);
+//   int c0 = ei_random<int>(0,cols/2-1),
+//       c1 = ei_random<int>(cols/2,cols),
+//       r0 = ei_random<int>(0,rows/2-1),
+//       r1 = ei_random<int>(rows/2,rows);
 
   // all the expressions in this test should be compiled as a single matrix product
   // TODO: add internal checks to verify that
-/*
+
   VERIFY_IS_APPROX(m1 * m2.adjoint(),  m1 * m2.adjoint().eval());
   VERIFY_IS_APPROX(m1.adjoint() * square.adjoint(),  m1.adjoint().eval() * square.adjoint().eval());
   VERIFY_IS_APPROX(m1.adjoint() * m2,  m1.adjoint().eval() * m2);
@@ -111,49 +109,14 @@ template<typename MatrixType> void product_extra(const MatrixType& m)
 
   VERIFY_IS_APPROX((-m1.adjoint() * s2) * (s1 * v1.adjoint()),
                    (-m1.adjoint()*s2).eval() * (s1 * v1.adjoint()).eval());
-  */
-  // test with sub matrices
-  m2 = m1;
-  m3 = m1;
 
-//   std::cerr << (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).rows() << " " << (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).cols() << " == " << vrres.segment(r0,r1-r0).rows() << " " << vrres.segment(r0,r1-r0).cols() << "\n";
-//   m2.col(c0).segment(0,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).lazy();
-//   m3.col(c0).segment(0,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).eval();
-  Matrix<Scalar,Dynamic,1> a = m2.col(c0), b = a;
-  a.segment(5,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).lazy();
-  b.segment(5,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).eval();
-
-//   m2.col(c0).segment(0,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).lazy();
-//   m3.col(c0).segment(0,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).eval();
-//   if (!m2.isApprox(m3))
-  std::cerr << (a-b).cwise().abs().maxCoeff() << "\n";
-  VERIFY_IS_APPROX(a,b);
-//   VERIFY_IS_APPROX( vrres.segment(0,r1-r0).transpose().eval(),
-//                     v1.segment(0,r1-r0).transpose() + m1.block(r0,c0, r1-r0, c1-c0).eval() * (vc2.segment(c0,c1-c0)).eval());
 }
 
 void test_product_extra()
 {
   for(int i = 0; i < g_repeat; i++) {
-    int rows = ei_random<int>(2,10);
-    int cols = ei_random<int>(2,10);
-    int c0 = ei_random<int>(0,cols/2-1),
-        c1 = ei_random<int>(cols/2,cols),
-        r0 = ei_random<int>(0,rows/2-1),
-        r1 = ei_random<int>(rows/2,rows);
-
-    MatrixXf m1 = MatrixXf::Random(rows,cols), m2 = m1;
-    Matrix<float,Dynamic,1> a = m2.col(c0), b = a;
-    Matrix<float,Dynamic,1> vc2 = Matrix<float,Dynamic,1>::Random(cols);
-    if (1+r1-r0<rows) {
-      a.segment(1,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).lazy();
-      b.segment(1,r1-r0) += (m1.block(r0,c0, r1-r0, c1-c0) * vc2.segment(c0,c1-c0)).eval();
-      VERIFY_IS_APPROX(a,b);
-    }
-//     CALL_SUBTEST( product_extra(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );
-//     CALL_SUBTEST( product_extra(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) );
-//     CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,320), ei_random<int>(1,320))) );
-//     CALL_SUBTEST( product_extra(MatrixXcf(ei_random<int>(50,50), ei_random<int>(50,50))) );
-//     CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,RowMajor>(ei_random<int>(1,320), ei_random<int>(1,320))) );
+    CALL_SUBTEST( product_extra(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );
+    CALL_SUBTEST( product_extra(MatrixXcf(ei_random<int>(50,50), ei_random<int>(50,50))) );
+    CALL_SUBTEST( product_extra(Matrix<std::complex<double>,Dynamic,Dynamic,RowMajor>(ei_random<int>(1,50), ei_random<int>(1,50))) );
   }
 }
diff --git a/test/triangular.cpp b/test/triangular.cpp
index 3550d1a74..0c03e987e 100644
--- a/test/triangular.cpp
+++ b/test/triangular.cpp
@@ -1,7 +1,7 @@
 // This file is triangularView of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@@ -81,44 +81,35 @@ template<typename MatrixType> void triangular(const MatrixType& m)
   m1.template triangularView<Eigen::LowerTriangular>() = (m2.transpose() * m2).lazy();
   VERIFY_IS_APPROX(m3.template triangularView<Eigen::LowerTriangular>().toDense(), m1);
 
-  // VERIFY_IS_APPROX(m3.template triangularView<DiagonalBits>(), m3.diagonal().asDiagonal());
-
   m1 = MatrixType::Random(rows, cols);
   for (int i=0; i<rows; ++i)
     while (ei_abs2(m1(i,i))<1e-3) m1(i,i) = ei_random<Scalar>();
 
   Transpose<MatrixType> trm4(m4);
   // test back and forward subsitution
+  m3 = m1.template triangularView<Eigen::UpperTriangular>();
+  VERIFY(m2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView<Eigen::LowerTriangular>().solve(m2)), largerEps));
   m3 = m1.template triangularView<Eigen::LowerTriangular>();
-//   VERIFY(m3.template triangularView<Eigen::LowerTriangular>().solve(m3).cwise().abs().isIdentity(test_precision<RealScalar>()));
-//   VERIFY(m3.transpose().template triangularView<Eigen::UpperTriangular>()
-//     .solve(m3.transpose()).cwise().abs().isIdentity(test_precision<RealScalar>()));
+  VERIFY(m2.isApprox(m3.transpose() * (m1.transpose().template triangularView<Eigen::UpperTriangular>().solve(m2)), largerEps));
+  m3 = m1.template triangularView<Eigen::UpperTriangular>();
+  VERIFY(m2.isApprox(m3 * (m1.template triangularView<Eigen::UpperTriangular>().solve(m2)), largerEps));
+  m3 = m1.template triangularView<Eigen::LowerTriangular>();
+  VERIFY(m2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView<Eigen::LowerTriangular>().solve(m2)), largerEps));
+
   // check M * inv(L) using in place API
   m4 = m3;
   m3.transpose().template triangularView<Eigen::UpperTriangular>().solveInPlace(trm4);
-//   VERIFY(m4.cwise().abs().isIdentity(test_precision<RealScalar>()));
+  VERIFY(m4.cwise().abs().isIdentity(test_precision<RealScalar>()));
 
-//   m3 = m1.template triangularView<Eigen::UpperTriangular>();
-//   VERIFY(m3.template triangularView<Eigen::UpperTriangular>().solve(m3).cwise().abs().isIdentity(test_precision<RealScalar>()));
-//   VERIFY(m3.transpose().template triangularView<Eigen::LowerTriangular>()
-//     .solve(m3.transpose()).cwise().abs().isIdentity(test_precision<RealScalar>()));
-//   // check M * inv(U) using in place API
+  // check M * inv(U) using in place API
+  m3 = m1.template triangularView<Eigen::UpperTriangular>();
   m4 = m3;
   m3.transpose().template triangularView<Eigen::LowerTriangular>().solveInPlace(trm4);
   VERIFY(m4.cwise().abs().isIdentity(test_precision<RealScalar>()));
 
-//   m3 = m1.template triangularView<Eigen::UpperTriangular>();
-//   VERIFY(m2.isApprox(m3 * (m3.template triangularView<Eigen::UpperTriangular>().solve(m2)), largerEps));
-//   m3 = m1.template triangularView<Eigen::LowerTriangular>();
-
-//   std::cerr << (m2 -
-//             (m3 * (m3.template triangularView<Eigen::LowerTriangular>().solve(m2)))).cwise().abs() /*.maxCoeff()*/ << "\n\n";
-
-//   VERIFY(m2.isApprox(m3 * (m3.template triangularView<Eigen::LowerTriangular>().solve(m2)), largerEps));
-
   // check solve with unit diagonal
-//   m3 = m1.template triangularView<Eigen::UnitUpperTriangular>();
-//   VERIFY(m2.isApprox(m3 * (m1.template triangularView<Eigen::UnitUpperTriangular>().solve(m2)), largerEps));
+  m3 = m1.template triangularView<Eigen::UnitUpperTriangular>();
+  VERIFY(m2.isApprox(m3 * (m1.template triangularView<Eigen::UnitUpperTriangular>().solve(m2)), largerEps));
 
 //   VERIFY((  m1.template triangularView<Eigen::UpperTriangular>()
 //           * m2.template triangularView<Eigen::UpperTriangular>()).isUpperTriangular());
@@ -136,17 +127,12 @@ template<typename MatrixType> void triangular(const MatrixType& m)
 void test_triangular()
 {
   for(int i = 0; i < g_repeat ; i++) {
-//     CALL_SUBTEST( triangular(Matrix<float, 1, 1>()) );
-//     CALL_SUBTEST( triangular(Matrix<float, 2, 2>()) );
-//     CALL_SUBTEST( triangular(Matrix3d()) );
-//     CALL_SUBTEST( triangular(MatrixXcf(4, 4)) );
-//     CALL_SUBTEST( triangular(Matrix<std::complex<float>,8, 8>()) );
-//     CALL_SUBTEST( triangular(MatrixXd(1,1)) );
-//     CALL_SUBTEST( triangular(MatrixXd(2,2)) );
-//     CALL_SUBTEST( triangular(MatrixXd(3,3)) );
-//     CALL_SUBTEST( triangular(MatrixXd(5,5)) );
-//     CALL_SUBTEST( triangular(MatrixXd(8,8)) );
+    CALL_SUBTEST( triangular(Matrix<float, 1, 1>()) );
+    CALL_SUBTEST( triangular(Matrix<float, 2, 2>()) );
+    CALL_SUBTEST( triangular(Matrix3d()) );
+    CALL_SUBTEST( triangular(MatrixXcf(4, 4)) );
+    CALL_SUBTEST( triangular(Matrix<std::complex<float>,8, 8>()) );
     CALL_SUBTEST( triangular(MatrixXd(17,17)) );
-//     CALL_SUBTEST( triangular(Matrix<float,Dynamic,Dynamic,RowMajor>(5, 5)) );
+    CALL_SUBTEST( triangular(Matrix<float,Dynamic,Dynamic,RowMajor>(5, 5)) );
   }
 }