Clang-format tests, examples, libraries, benchmarks, etc.

2025-07-02 03:05:10 +08:00 · 2023-12-05 21:22:55 +00:00 · 2023-12-05 21:22:55 +00:00 · 46e9cdb7fe
commit 46e9cdb7fe
parent 3252ecc7a4
876 changed files with 33453 additions and 37795 deletions
--- a/bench/BenchSparseUtil.h
+++ b/bench/BenchSparseUtil.h
@ -20,63 +20,51 @@ using namespace Eigen;
 #endif

 typedef SCALAR Scalar;
-typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
-typedef Matrix<Scalar,Dynamic,1> DenseVector;
+typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
+typedef Matrix<Scalar, Dynamic, 1> DenseVector;
 typedef SparseMatrix<Scalar> EigenSparseMatrix;

-void fillMatrix(float density, int rows, int cols,  EigenSparseMatrix& dst)
-{
-  dst.reserve(double(rows)*cols*density);
-  for(int j = 0; j < cols; j++)
-  {
-    for(int i = 0; i < rows; i++)
-    {
-      Scalar v = (internal::random<float>(0,1) < density) ? internal::random<Scalar>() : 0;
-      if (v!=0)
-        dst.insert(i,j) = v;
+void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) {
+  dst.reserve(double(rows) * cols * density);
+  for (int j = 0; j < cols; j++) {
+    for (int i = 0; i < rows; i++) {
+      Scalar v = (internal::random<float>(0, 1) < density) ? internal::random<Scalar>() : 0;
+      if (v != 0) dst.insert(i, j) = v;
    }
  }
  dst.finalize();
 }

-void fillMatrix2(int nnzPerCol, int rows, int cols,  EigenSparseMatrix& dst)
-{
-//   std::cout << "alloc " << nnzPerCol*cols << "\n";
-  dst.reserve(nnzPerCol*cols);
-  for(int j = 0; j < cols; j++)
-  {
+void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) {
+  //   std::cout << "alloc " << nnzPerCol*cols << "\n";
+  dst.reserve(nnzPerCol * cols);
+  for (int j = 0; j < cols; j++) {
    std::set<int> aux;
-    for(int i = 0; i < nnzPerCol; i++)
-    {
-      int k = internal::random<int>(0,rows-1);
-      while (aux.find(k)!=aux.end())
-        k = internal::random<int>(0,rows-1);
+    for (int i = 0; i < nnzPerCol; i++) {
+      int k = internal::random<int>(0, rows - 1);
+      while (aux.find(k) != aux.end()) k = internal::random<int>(0, rows - 1);
      aux.insert(k);

-      dst.insert(k,j) = internal::random<Scalar>();
+      dst.insert(k, j) = internal::random<Scalar>();
    }
  }
  dst.finalize();
 }

-void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst)
-{
+void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) {
  dst.setZero();
-  for (int j=0; j<src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
-      dst(it.index(),j) = it.value();
+  for (int j = 0; j < src.cols(); ++j)
+    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
 }

 #ifndef NOGMM
 #include "gmm/gmm.h"
 typedef gmm::csc_matrix<Scalar> GmmSparse;
-typedef gmm::col_matrix< gmm::wsvector<Scalar> > GmmDynSparse;
-void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
-{
+typedef gmm::col_matrix<gmm::wsvector<Scalar> > GmmDynSparse;
+void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) {
  GmmDynSparse tmp(src.rows(), src.cols());
-  for (int j=0; j<src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
-      tmp(it.index(),j) = it.value();
+  for (int j = 0; j < src.cols(); ++j)
+    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) tmp(it.index(), j) = it.value();
  gmm::copy(tmp, dst);
 }
 #endif
@ -85,12 +73,10 @@ void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
 #include <boost/numeric/mtl/mtl.hpp>
 typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
 typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
-void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
-{
+void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) {
  mtl::matrix::inserter<MtlSparse> ins(dst);
-  for (int j=0; j<src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
-      ins[it.index()][j] = it.value();
+  for (int j = 0; j < src.cols(); ++j)
+    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) ins[it.index()][j] = it.value();
 }
 #endif

@ -98,20 +84,18 @@ void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
 extern "C" {
 #include "cs.h"
 }
-void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
-{
-  cs* aux = cs_spalloc (0, 0, 1, 1, 1);
-  for (int j=0; j<src.cols(); ++j)
+void eiToCSparse(const EigenSparseMatrix& src, cs*& dst) {
+  cs* aux = cs_spalloc(0, 0, 1, 1, 1);
+  for (int j = 0; j < src.cols(); ++j)
    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
-      if (!cs_entry(aux, it.index(), j, it.value()))
-      {
+      if (!cs_entry(aux, it.index(), j, it.value())) {
        std::cout << "cs_entry error\n";
        exit(2);
      }
-   dst = cs_compress(aux);
-//    cs_spfree(aux);
+  dst = cs_compress(aux);
+  //    cs_spfree(aux);
 }
-#endif // CSPARSE
+#endif  // CSPARSE

 #ifndef NOUBLAS
 #include <boost/numeric/ublas/vector.hpp>
@ -123,22 +107,18 @@ void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
 #include <boost/numeric/ublas/vector_of_vector.hpp>
 #include <boost/numeric/ublas/operation.hpp>

-typedef boost::numeric::ublas::compressed_matrix<Scalar,boost::numeric::ublas::column_major> UBlasSparse;
+typedef boost::numeric::ublas::compressed_matrix<Scalar, boost::numeric::ublas::column_major> UBlasSparse;

-void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst)
-{
+void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) {
  dst.resize(src.rows(), src.cols(), false);
-  for (int j=0; j<src.cols(); ++j)
-    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
-      dst(it.index(),j) = it.value();
+  for (int j = 0; j < src.cols(); ++j)
+    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
 }

 template <typename EigenType, typename UblasType>
-void eiToUblasVec(const EigenType& src, UblasType& dst)
-{
+void eiToUblasVec(const EigenType& src, UblasType& dst) {
  dst.resize(src.size());
-  for (int j=0; j<src.size(); ++j)
-      dst[j] = src.coeff(j);
+  for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
 }
 #endif

--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@ -12,19 +12,19 @@
 #define EIGEN_BENCH_TIMERR_H

 #if defined(_WIN32) || defined(__CYGWIN__)
-# ifndef NOMINMAX
-#   define NOMINMAX
-#   define EIGEN_BT_UNDEF_NOMINMAX
-# endif
-# ifndef WIN32_LEAN_AND_MEAN
-#   define WIN32_LEAN_AND_MEAN
-#   define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
-# endif
-# include <windows.h>
+#ifndef NOMINMAX
+#define NOMINMAX
+#define EIGEN_BT_UNDEF_NOMINMAX
+#endif
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
 #elif defined(__APPLE__)
 #include <mach/mach_time.h>
 #else
-# include <unistd.h>
+#include <unistd.h>
 #endif

 static void escape(void *p) {
@ -41,27 +41,20 @@ static void clobber() {

 #include <Eigen/Core>

-namespace Eigen
-{
+namespace Eigen {

-enum {
-  CPU_TIMER = 0,
-  REAL_TIMER = 1
-};
+enum { CPU_TIMER = 0, REAL_TIMER = 1 };

 /** Elapsed time timer keeping the best try.
-  *
-  * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
-  * On Windows we use QueryPerformanceCounter
-  *
-  * Important: on linux, you must link with -lrt
-  */
-class BenchTimer
-{
-public:
-
-  BenchTimer()
-  {
+ *
+ * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
+ * On Windows we use QueryPerformanceCounter
+ *
+ * Important: on linux, you must link with -lrt
+ */
+class BenchTimer {
+ public:
+  BenchTimer() {
 #if defined(_WIN32) || defined(__CYGWIN__)
    LARGE_INTEGER freq;
    QueryPerformanceFrequency(&freq);
@ -72,69 +65,53 @@ public:

  ~BenchTimer() {}

-  inline void reset()
-  {
+  inline void reset() {
    m_bests.fill(1e9);
    m_worsts.fill(0);
    m_totals.setZero();
  }
-  inline void start()
-  {
-    m_starts[CPU_TIMER]  = getCpuTime();
+  inline void start() {
+    m_starts[CPU_TIMER] = getCpuTime();
    m_starts[REAL_TIMER] = getRealTime();
  }
-  inline void stop()
-  {
+  inline void stop() {
    m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
    m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
-    #if EIGEN_VERSION_AT_LEAST(2,90,0)
+#if EIGEN_VERSION_AT_LEAST(2, 90, 0)
    m_bests = m_bests.cwiseMin(m_times);
    m_worsts = m_worsts.cwiseMax(m_times);
-    #else
-    m_bests(0) = std::min(m_bests(0),m_times(0));
-    m_bests(1) = std::min(m_bests(1),m_times(1));
-    m_worsts(0) = std::max(m_worsts(0),m_times(0));
-    m_worsts(1) = std::max(m_worsts(1),m_times(1));
-    #endif
+#else
+    m_bests(0) = std::min(m_bests(0), m_times(0));
+    m_bests(1) = std::min(m_bests(1), m_times(1));
+    m_worsts(0) = std::max(m_worsts(0), m_times(0));
+    m_worsts(1) = std::max(m_worsts(1), m_times(1));
+#endif
    m_totals += m_times;
  }

  /** Return the elapsed time in seconds between the last start/stop pair
-    */
-  inline double value(int TIMER = CPU_TIMER) const
-  {
-    return m_times[TIMER];
-  }
+   */
+  inline double value(int TIMER = CPU_TIMER) const { return m_times[TIMER]; }

  /** Return the best elapsed time in seconds
-    */
-  inline double best(int TIMER = CPU_TIMER) const
-  {
-    return m_bests[TIMER];
-  }
+   */
+  inline double best(int TIMER = CPU_TIMER) const { return m_bests[TIMER]; }

  /** Return the worst elapsed time in seconds
-    */
-  inline double worst(int TIMER = CPU_TIMER) const
-  {
-    return m_worsts[TIMER];
-  }
+   */
+  inline double worst(int TIMER = CPU_TIMER) const { return m_worsts[TIMER]; }

  /** Return the total elapsed time in seconds.
-    */
-  inline double total(int TIMER = CPU_TIMER) const
-  {
-    return m_totals[TIMER];
-  }
+   */
+  inline double total(int TIMER = CPU_TIMER) const { return m_totals[TIMER]; }

-  inline double getCpuTime() const
-  {
+  inline double getCpuTime() const {
 #ifdef _WIN32
    LARGE_INTEGER query_ticks;
    QueryPerformanceCounter(&query_ticks);
-    return query_ticks.QuadPart/m_frequency;
+    return query_ticks.QuadPart / m_frequency;
 #elif __APPLE__
-    return double(mach_absolute_time())*1e-9;
+    return double(mach_absolute_time()) * 1e-9;
 #else
    timespec ts;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
@ -142,14 +119,13 @@ public:
 #endif
  }

-  inline double getRealTime() const
-  {
+  inline double getRealTime() const {
 #ifdef _WIN32
    SYSTEMTIME st;
    GetSystemTime(&st);
    return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
 #elif __APPLE__
-    return double(mach_absolute_time())*1e-9;
+    return double(mach_absolute_time()) * 1e-9;
 #else
    timespec ts;
    clock_gettime(CLOCK_REALTIME, &ts);
@ -157,7 +133,7 @@ public:
 #endif
  }

-protected:
+ protected:
 #if defined(_WIN32) || defined(__CYGWIN__)
  double m_frequency;
 #endif
@ -167,33 +143,34 @@ protected:
  Vector2d m_worsts;
  Vector2d m_totals;

-public:
+ public:
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 };

-#define BENCH(TIMER,TRIES,REP,CODE) { \
-    TIMER.reset(); \
-    for(int uglyvarname1=0; uglyvarname1<TRIES; ++uglyvarname1){ \
-      TIMER.start(); \
-      for(int uglyvarname2=0; uglyvarname2<REP; ++uglyvarname2){ \
-        CODE; \
-      } \
-      TIMER.stop(); \
-      clobber(); \
-    } \
+#define BENCH(TIMER, TRIES, REP, CODE)                                 \
+  {                                                                    \
+    TIMER.reset();                                                     \
+    for (int uglyvarname1 = 0; uglyvarname1 < TRIES; ++uglyvarname1) { \
+      TIMER.start();                                                   \
+      for (int uglyvarname2 = 0; uglyvarname2 < REP; ++uglyvarname2) { \
+        CODE;                                                          \
+      }                                                                \
+      TIMER.stop();                                                    \
+      clobber();                                                       \
+    }                                                                  \
  }

-}
+}  // namespace Eigen

 // clean #defined tokens
 #ifdef EIGEN_BT_UNDEF_NOMINMAX
-# undef EIGEN_BT_UNDEF_NOMINMAX
-# undef NOMINMAX
+#undef EIGEN_BT_UNDEF_NOMINMAX
+#undef NOMINMAX
 #endif

 #ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
-# undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
-# undef WIN32_LEAN_AND_MEAN
+#undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
+#undef WIN32_LEAN_AND_MEAN
 #endif

-#endif // EIGEN_BENCH_TIMERR_H
+#endif  // EIGEN_BENCH_TIMERR_H
--- a/bench/BenchUtil.h
+++ b/bench/BenchUtil.h
@ -18,54 +18,52 @@ using namespace Eigen;
 #include <boost/preprocessor/punctuation/comma.hpp>
 #include <boost/preprocessor/stringize.hpp>

-template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline));
-template<typename MatrixType> void initMatrix_random(MatrixType& mat)
-{
-  mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols());
+template <typename MatrixType>
+void initMatrix_random(MatrixType& mat) __attribute__((noinline));
+template <typename MatrixType>
+void initMatrix_random(MatrixType& mat) {
+  mat.setRandom();  // = MatrixType::random(mat.rows(), mat.cols());
 }

-template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
-template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
-{
+template <typename MatrixType>
+void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
+template <typename MatrixType>
+void initMatrix_identity(MatrixType& mat) {
  mat.setIdentity();
 }

 #ifndef __INTEL_COMPILER
-#define DISABLE_SSE_EXCEPTIONS()  { \
-  int aux; \
-  asm( \
-  "stmxcsr   %[aux]           \n\t" \
-  "orl       $32832, %[aux]   \n\t" \
-  "ldmxcsr   %[aux]           \n\t" \
-  : : [aux] "m" (aux)); \
-}
+#define DISABLE_SSE_EXCEPTIONS()          \
+  {                                       \
+    int aux;                              \
+    asm("stmxcsr   %[aux]           \n\t" \
+        "orl       $32832, %[aux]   \n\t" \
+        "ldmxcsr   %[aux]           \n\t" \
+        :                                 \
+        : [aux] "m"(aux));                \
+  }
 #else
-#define DISABLE_SSE_EXCEPTIONS()  
+#define DISABLE_SSE_EXCEPTIONS()
 #endif

 #ifdef BENCH_GMM
 #include <gmm/gmm.h>
 template <typename EigenMatrixType, typename GmmMatrixType>
-void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
-{
-  dst.resize(src.rows(),src.cols());
-  for (int j=0; j<src.cols(); ++j)
-    for (int i=0; i<src.rows(); ++i)
-      dst(i,j) = src.coeff(i,j);
+void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) {
+  dst.resize(src.rows(), src.cols());
+  for (int j = 0; j < src.cols(); ++j)
+    for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
 }
 #endif

-
 #ifdef BENCH_GSL
 #include <gsl/gsl_matrix.h>
 #include <gsl/gsl_linalg.h>
 #include <gsl/gsl_eigen.h>
 template <typename EigenMatrixType>
-void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
-{
-  for (int j=0; j<src.cols(); ++j)
-    for (int i=0; i<src.rows(); ++i)
-      gsl_matrix_set(*dst, i, j, src.coeff(i,j));
+void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) {
+  for (int j = 0; j < src.cols(); ++j)
+    for (int i = 0; i < src.rows(); ++i) gsl_matrix_set(*dst, i, j, src.coeff(i, j));
 }
 #endif

@ -73,20 +71,16 @@ void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
 #include <boost/numeric/ublas/matrix.hpp>
 #include <boost/numeric/ublas/vector.hpp>
 template <typename EigenMatrixType, typename UblasMatrixType>
-void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst)
-{
-  dst.resize(src.rows(),src.cols());
-  for (int j=0; j<src.cols(); ++j)
-    for (int i=0; i<src.rows(); ++i)
-      dst(i,j) = src.coeff(i,j);
+void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst) {
+  dst.resize(src.rows(), src.cols());
+  for (int j = 0; j < src.cols(); ++j)
+    for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
 }
 template <typename EigenType, typename UblasType>
-void eiToUblasVec(const EigenType& src, UblasType& dst)
-{
+void eiToUblasVec(const EigenType& src, UblasType& dst) {
  dst.resize(src.size());
-  for (int j=0; j<src.size(); ++j)
-      dst[j] = src.coeff(j);
+  for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
 }
 #endif

-#endif // EIGEN_BENCH_UTIL_H
+#endif  // EIGEN_BENCH_UTIL_H
--- a/bench/analyze-blocking-sizes.cpp
+++ b/bench/analyze-blocking-sizes.cpp
@ -37,20 +37,17 @@ uint8_t log2_pot(size_t x) {
  return l;
 }

-uint16_t compact_size_triple(size_t k, size_t m, size_t n)
-{
+uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
 }

 // just a helper to store a triple of K,M,N sizes for matrix product
-struct size_triple_t
-{
+struct size_triple_t {
  uint16_t k, m, n;
  size_triple_t() : k(0), m(0), n(0) {}
  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
-  size_triple_t(uint16_t compact)
-  {
+  size_triple_t(uint16_t compact) {
    k = 1 << ((compact & 0xf00) >> 8);
    m = 1 << ((compact & 0x0f0) >> 4);
    n = 1 << ((compact & 0x00f) >> 0);
@ -58,35 +55,23 @@ struct size_triple_t
  bool is_cubic() const { return k == m && m == n; }
 };

-ostream& operator<<(ostream& s, const size_triple_t& t)
-{
-  return s << "(" << t.k << ", " << t.m << ", " << t.n << ")";
-}
+ostream& operator<<(ostream& s, const size_triple_t& t) { return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; }

-struct inputfile_entry_t
-{
+struct inputfile_entry_t {
  uint16_t product_size;
  uint16_t pot_block_size;
  size_triple_t nonpot_block_size;
  float gflops;
 };

-struct inputfile_t
-{
-  enum class type_t {
-    unknown,
-    all_pot_sizes,
-    default_sizes
-  };
+struct inputfile_t {
+  enum class type_t { unknown, all_pot_sizes, default_sizes };

  string filename;
  vector<inputfile_entry_t> entries;
  type_t type;

-  inputfile_t(const string& fname)
-    : filename(fname)
-    , type(type_t::unknown)
-  {
+  inputfile_t(const string& fname) : filename(fname), type(type_t::unknown) {
    ifstream stream(filename);
    if (!stream.is_open()) {
      cerr << "couldn't open input file: " << filename << endl;
@ -111,27 +96,17 @@ struct inputfile_t
        type = type_t::default_sizes;
        continue;
      }
-      

      if (type == type_t::unknown) {
        continue;
      }
-      switch(type) {
+      switch (type) {
        case type_t::all_pot_sizes: {
          unsigned int product_size, block_size;
          float gflops;
-          int sscanf_result =
-            sscanf(line.c_str(), "%x %x %f",
-                   &product_size,
-                   &block_size,
-                   &gflops);
-          if (3 != sscanf_result ||
-              !product_size ||
-              product_size > 0xfff ||
-              !block_size ||
-              block_size > 0xfff ||
-              !isfinite(gflops))
-          {
+          int sscanf_result = sscanf(line.c_str(), "%x %x %f", &product_size, &block_size, &gflops);
+          if (3 != sscanf_result || !product_size || product_size > 0xfff || !block_size || block_size > 0xfff ||
+              !isfinite(gflops)) {
            cerr << "ill-formed input file: " << filename << endl;
            cerr << "offending line:" << endl << line << endl;
            exit(1);
@ -150,16 +125,8 @@ struct inputfile_t
          unsigned int product_size;
          float gflops;
          int bk, bm, bn;
-          int sscanf_result =
-            sscanf(line.c_str(), "%x default(%d, %d, %d) %f",
-                   &product_size,
-                   &bk, &bm, &bn,
-                   &gflops);
-          if (5 != sscanf_result ||
-              !product_size ||
-              product_size > 0xfff ||
-              !isfinite(gflops))
-          {
+          int sscanf_result = sscanf(line.c_str(), "%x default(%d, %d, %d) %f", &product_size, &bk, &bm, &bn, &gflops);
+          if (5 != sscanf_result || !product_size || product_size > 0xfff || !isfinite(gflops)) {
            cerr << "ill-formed input file: " << filename << endl;
            cerr << "offending line:" << endl << line << endl;
            exit(1);
@ -175,7 +142,7 @@ struct inputfile_t
          entries.push_back(entry);
          break;
        }
-        
+
        default:
          break;
      }
@ -192,27 +159,22 @@ struct inputfile_t
  }
 };

-struct preprocessed_inputfile_entry_t
-{
+struct preprocessed_inputfile_entry_t {
  uint16_t product_size;
  uint16_t block_size;

  float efficiency;
 };

-bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2)
-{
+bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) {
  return e1.efficiency < e2.efficiency;
 }

-struct preprocessed_inputfile_t
-{
+struct preprocessed_inputfile_t {
  string filename;
  vector<preprocessed_inputfile_entry_t> entries;

-  preprocessed_inputfile_t(const inputfile_t& inputfile)
-    : filename(inputfile.filename)
-  {
+  preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) {
    if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
      abort();
    }
@ -220,20 +182,16 @@ struct preprocessed_inputfile_t
    auto it_first_with_given_product_size = it;
    while (it != inputfile.entries.end()) {
      ++it;
-      if (it == inputfile.entries.end() ||
-        it->product_size != it_first_with_given_product_size->product_size)
-      {
+      if (it == inputfile.entries.end() || it->product_size != it_first_with_given_product_size->product_size) {
        import_input_file_range_one_product_size(it_first_with_given_product_size, it);
        it_first_with_given_product_size = it;
      }
    }
  }

-private:
-  void import_input_file_range_one_product_size(
-    const vector<inputfile_entry_t>::const_iterator& begin,
-    const vector<inputfile_entry_t>::const_iterator& end)
-  {
+ private:
+  void import_input_file_range_one_product_size(const vector<inputfile_entry_t>::const_iterator& begin,
+                                                const vector<inputfile_entry_t>::const_iterator& end) {
    uint16_t product_size = begin->product_size;
    float max_gflops = 0.0f;
    for (auto it = begin; it != end; ++it) {
@ -254,9 +212,7 @@ private:
  }
 };

-void check_all_files_in_same_exact_order(
-       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles)
-{
+void check_all_files_in_same_exact_order(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) {
  if (preprocessed_inputfiles.empty()) {
    return;
  }
@ -266,11 +222,8 @@ void check_all_files_in_same_exact_order(

  for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
    if (preprocessed_inputfiles[i].entries.size() != num_entries) {
-      cerr << "these files have different number of entries: "
-           << preprocessed_inputfiles[i].filename
-           << " and "
-           << first_file.filename
-           << endl;
+      cerr << "these files have different number of entries: " << preprocessed_inputfiles[i].filename << " and "
+           << first_file.filename << endl;
      exit(1);
    }
  }
@ -281,12 +234,8 @@ void check_all_files_in_same_exact_order(
    for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) {
      const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index];
      if (cur_file.entries[entry_index].product_size != entry_product_size ||
-          cur_file.entries[entry_index].block_size != entry_block_size)
-      {
-        cerr << "entries not in same order between these files: "
-             << first_file.filename
-             << " and "
-             << cur_file.filename
+          cur_file.entries[entry_index].block_size != entry_block_size) {
+        cerr << "entries not in same order between these files: " << first_file.filename << " and " << cur_file.filename
             << endl;
        exit(1);
      }
@ -294,10 +243,8 @@ void check_all_files_in_same_exact_order(
  }
 }

-float efficiency_of_subset(
-        const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-        const vector<size_t>& subset)
-{
+float efficiency_of_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
+                           const vector<size_t>& subset) {
  if (subset.size() <= 1) {
    return 1.0f;
  }
@ -309,9 +256,7 @@ float efficiency_of_subset(
  uint16_t product_size = first_file.entries[0].product_size;
  while (entry_index < num_entries) {
    ++entry_index;
-    if (entry_index == num_entries ||
-        first_file.entries[entry_index].product_size != product_size)
-    {
+    if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
      float efficiency_this_product_size = 0.0f;
      for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
        float efficiency_this_entry = 1.0f;
@ -331,10 +276,8 @@ float efficiency_of_subset(
  return efficiency;
 }

-void dump_table_for_subset(
-        const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-        const vector<size_t>& subset)
-{
+void dump_table_for_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
+                           const vector<size_t>& subset) {
  const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
  const size_t num_entries = first_file.entries.size();
  size_t entry_index = 0;
@ -359,9 +302,7 @@ void dump_table_for_subset(
  cout << "    static const unsigned short data[" << TableSize << "] = {";
  while (entry_index < num_entries) {
    ++entry_index;
-    if (entry_index == num_entries ||
-        first_file.entries[entry_index].product_size != product_size)
-    {
+    if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
      float best_efficiency_this_product_size = 0.0f;
      uint16_t best_block_size_this_product_size = 0;
      for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
@ -397,10 +338,8 @@ void dump_table_for_subset(
  cout << "};" << endl;
 }

-float efficiency_of_partition(
-        const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-        const vector<vector<size_t>>& partition)
-{
+float efficiency_of_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
+                              const vector<vector<size_t>>& partition) {
  float efficiency = 1.0f;
  for (auto s = partition.begin(); s != partition.end(); ++s) {
    efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s));
@ -408,8 +347,7 @@ float efficiency_of_partition(
  return efficiency;
 }

-void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size)
-{
+void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size) {
  assert(subset_size >= 1 && subset_size <= set_size);
  out_subset.resize(subset_size);
  for (size_t i = 0; i < subset_size; i++) {
@ -417,13 +355,9 @@ void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t se
  }
 }

-bool is_last_subset(const vector<size_t>& subset, size_t set_size)
-{
-  return subset[0] == set_size - subset.size();
-}
+bool is_last_subset(const vector<size_t>& subset, size_t set_size) { return subset[0] == set_size - subset.size(); }

-void next_subset(vector<size_t>& inout_subset, size_t set_size)
-{
+void next_subset(vector<size_t>& inout_subset, size_t set_size) {
  if (is_last_subset(inout_subset, set_size)) {
    cerr << "iterating past the last subset" << endl;
    abort();
@ -444,9 +378,8 @@ void next_subset(vector<size_t>& inout_subset, size_t set_size)
 const size_t number_of_subsets_limit = 100;
 const size_t always_search_subsets_of_size_at_least = 2;

-bool is_number_of_subsets_feasible(size_t n, size_t p)
-{ 
-  assert(n>0 && p>0 && p<=n);
+bool is_number_of_subsets_feasible(size_t n, size_t p) {
+  assert(n > 0 && p > 0 && p <= n);
  uint64_t numerator = 1, denominator = 1;
  for (size_t i = 0; i < p; i++) {
    numerator *= n - i;
@ -458,24 +391,20 @@ bool is_number_of_subsets_feasible(size_t n, size_t p)
  return true;
 }

-size_t max_feasible_subset_size(size_t n)
-{
+size_t max_feasible_subset_size(size_t n) {
  assert(n > 0);
-  const size_t minresult = min<size_t>(n-1, always_search_subsets_of_size_at_least);
+  const size_t minresult = min<size_t>(n - 1, always_search_subsets_of_size_at_least);
  for (size_t p = 1; p <= n - 1; p++) {
-    if (!is_number_of_subsets_feasible(n, p+1)) {
+    if (!is_number_of_subsets_feasible(n, p + 1)) {
      return max(p, minresult);
    }
  }
  return n - 1;
 }

-void find_subset_with_efficiency_higher_than(
-       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-       float required_efficiency_to_beat,
-       vector<size_t>& inout_remainder,
-       vector<size_t>& out_subset)
-{
+void find_subset_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
+                                             float required_efficiency_to_beat, vector<size_t>& inout_remainder,
+                                             vector<size_t>& out_subset) {
  out_subset.resize(0);

  if (required_efficiency_to_beat >= 1.0f) {
@ -484,7 +413,6 @@ void find_subset_with_efficiency_higher_than(
  }

  while (!inout_remainder.empty()) {
-
    vector<size_t> candidate_indices(inout_remainder.size());
    for (size_t i = 0; i < candidate_indices.size(); i++) {
      candidate_indices[i] = i;
@ -493,20 +421,17 @@ void find_subset_with_efficiency_higher_than(
    size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size());
    while (candidate_indices_subset_size >= 1) {
      vector<size_t> candidate_indices_subset;
-      make_first_subset(candidate_indices_subset_size,
-                        candidate_indices_subset,
-                        candidate_indices.size());
+      make_first_subset(candidate_indices_subset_size, candidate_indices_subset, candidate_indices.size());

      vector<size_t> best_candidate_indices_subset;
      float best_efficiency = 0.0f;
      vector<size_t> trial_subset = out_subset;
      trial_subset.resize(out_subset.size() + candidate_indices_subset_size);
-      while (true)
-      {
+      while (true) {
        for (size_t i = 0; i < candidate_indices_subset_size; i++) {
          trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]];
        }
-        
+
        float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
        if (trial_efficiency > best_efficiency) {
          best_efficiency = trial_efficiency;
@ -517,7 +442,7 @@ void find_subset_with_efficiency_higher_than(
        }
        next_subset(candidate_indices_subset, candidate_indices.size());
      }
-       
+
      if (best_efficiency > required_efficiency_to_beat) {
        for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) {
          candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]];
@ -526,7 +451,7 @@ void find_subset_with_efficiency_higher_than(
      }
      candidate_indices_subset_size--;
    }
-      
+
    size_t candidate_index = candidate_indices[0];
    auto candidate_iterator = inout_remainder.begin() + candidate_index;
    vector<size_t> trial_subset = out_subset;
@ -542,11 +467,9 @@ void find_subset_with_efficiency_higher_than(
  }
 }

-void find_partition_with_efficiency_higher_than(
-       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-       float required_efficiency_to_beat,
-       vector<vector<size_t>>& out_partition)
-{
+void find_partition_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
+                                                float required_efficiency_to_beat,
+                                                vector<vector<size_t>>& out_partition) {
  out_partition.resize(0);

  vector<size_t> remainder;
@ -556,25 +479,19 @@ void find_partition_with_efficiency_higher_than(

  while (!remainder.empty()) {
    vector<size_t> new_subset;
-    find_subset_with_efficiency_higher_than(
-      preprocessed_inputfiles,
-      required_efficiency_to_beat,
-      remainder,
-      new_subset);
+    find_subset_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, remainder,
+                                            new_subset);
    out_partition.push_back(new_subset);
  }
 }

-void print_partition(
-       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
-       const vector<vector<size_t>>& partition)
-{
+void print_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
+                     const vector<vector<size_t>>& partition) {
  float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
-  cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency"  << endl;
+  cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl;
  for (auto subset = partition.begin(); subset != partition.end(); ++subset) {
-    cout << "  Subset " << (subset - partition.begin())
-         << ", efficiency " << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:"
-         << endl;
+    cout << "  Subset " << (subset - partition.begin()) << ", efficiency "
+         << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" << endl;
    for (auto file = subset->begin(); file != subset->end(); ++file) {
      cout << "    " << preprocessed_inputfiles[*file].filename << endl;
    }
@ -586,18 +503,18 @@ void print_partition(
  cout << endl;
 }

-struct action_t
-{
-  virtual const char* invokation_name() const { abort(); return nullptr; }
+struct action_t {
+  virtual const char* invokation_name() const {
+    abort();
+    return nullptr;
+  }
  virtual void run(const vector<string>&) const { abort(); }
  virtual ~action_t() {}
 };

-struct partition_action_t : action_t
-{
+struct partition_action_t : action_t {
  virtual const char* invokation_name() const override { return "partition"; }
-  virtual void run(const vector<string>& input_filenames) const override
-  {
+  virtual void run(const vector<string>& input_filenames) const override {
    vector<preprocessed_inputfile_t> preprocessed_inputfiles;

    if (input_filenames.empty()) {
@ -627,17 +544,12 @@ struct partition_action_t : action_t
    float required_efficiency_to_beat = 0.0f;
    vector<vector<vector<size_t>>> partitions;
    cerr << "searching for partitions...\r" << flush;
-    while (true)
-    {
+    while (true) {
      vector<vector<size_t>> partition;
-      find_partition_with_efficiency_higher_than(
-        preprocessed_inputfiles,
-        required_efficiency_to_beat,
-        partition);
+      find_partition_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, partition);
      float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
-      cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
-           << " subsets for " << 100.0f * actual_efficiency
-           << " % efficiency"
+      cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() << " subsets for "
+           << 100.0f * actual_efficiency << " % efficiency"
           << "                  \r" << flush;
      partitions.push_back(partition);
      if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
@ -649,7 +561,7 @@ struct partition_action_t : action_t
    while (true) {
      bool repeat = false;
      for (size_t i = 0; i < partitions.size() - 1; i++) {
-        if (partitions[i].size() >= partitions[i+1].size()) {
+        if (partitions[i].size() >= partitions[i + 1].size()) {
          partitions.erase(partitions.begin() + i);
          repeat = true;
          break;
@ -665,8 +577,7 @@ struct partition_action_t : action_t
  }
 };

-struct evaluate_defaults_action_t : action_t
-{
+struct evaluate_defaults_action_t : action_t {
  struct results_entry_t {
    uint16_t product_size;
    size_triple_t default_block_size;
@ -675,30 +586,24 @@ struct evaluate_defaults_action_t : action_t
    float best_pot_gflops;
    float default_efficiency;
  };
-  friend ostream& operator<<(ostream& s, const results_entry_t& entry)
-  {
-    return s
-      << "Product size " << size_triple_t(entry.product_size)
-      << ": default block size " << entry.default_block_size
-      << " -> " << entry.default_gflops
-      << " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
-      << " of best POT block size " << size_triple_t(entry.best_pot_block_size)
-      << " -> " << entry.best_pot_gflops
-      << " GFlop/s" << dec;
+  friend ostream& operator<<(ostream& s, const results_entry_t& entry) {
+    return s << "Product size " << size_triple_t(entry.product_size) << ": default block size "
+             << entry.default_block_size << " -> " << entry.default_gflops
+             << " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
+             << " of best POT block size " << size_triple_t(entry.best_pot_block_size) << " -> "
+             << entry.best_pot_gflops << " GFlop/s" << dec;
  }
  static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
    return e1.default_efficiency < e2.default_efficiency;
  }
  virtual const char* invokation_name() const override { return "evaluate-defaults"; }
-  void show_usage_and_exit() const
-  {
+  void show_usage_and_exit() const {
    cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
    cerr << "checks how well the performance with default sizes compares to the best "
         << "performance measured over all POT sizes." << endl;
    exit(1);
  }
-  virtual void run(const vector<string>& input_filenames) const override
-  {
+  virtual void run(const vector<string>& input_filenames) const override {
    if (input_filenames.size() != 2) {
      show_usage_and_exit();
    }
@ -714,20 +619,17 @@ struct evaluate_defaults_action_t : action_t
    }
    vector<results_entry_t> results;
    vector<results_entry_t> cubic_results;
-    
+
    uint16_t product_size = 0;
    auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
    for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
-         it_default_sizes != inputfile_default_sizes.entries.end();
-         ++it_default_sizes)
-    {
+         it_default_sizes != inputfile_default_sizes.entries.end(); ++it_default_sizes) {
      if (it_default_sizes->product_size == product_size) {
        continue;
      }
      product_size = it_default_sizes->product_size;
      while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
-             it_all_pot_sizes->product_size != product_size)
-      {
+             it_all_pot_sizes->product_size != product_size) {
        ++it_all_pot_sizes;
      }
      if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
@ -735,10 +637,8 @@ struct evaluate_defaults_action_t : action_t
      }
      uint16_t best_pot_block_size = 0;
      float best_pot_gflops = 0;
-      for (auto it = it_all_pot_sizes;
-           it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
-           ++it)
-      {
+      for (auto it = it_all_pot_sizes; it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
+           ++it) {
        if (it->gflops > best_pot_gflops) {
          best_pot_gflops = it->gflops;
          best_pot_block_size = it->pot_block_size;
@ -766,7 +666,7 @@ struct evaluate_defaults_action_t : action_t
    cout << endl;

    sort(results.begin(), results.end(), lower_efficiency);
-    
+
    const size_t n = min<size_t>(20, results.size());
    cout << n << " worst results:" << endl;
    for (size_t i = 0; i < n; i++) {
@ -781,34 +681,30 @@ struct evaluate_defaults_action_t : action_t
    cout << endl;

    sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
-    
+
    cout.precision(2);
    vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f};
    for (auto it = a.begin(); it != a.end(); ++it) {
      size_t n = min(results.size() - 1, size_t(*it * results.size()));
      cout << (100.0f * n / (results.size() - 1))
-           << " % of product sizes have default efficiency <= "
-           << 100.0f * results[n].default_efficiency << " %" << endl;
+           << " % of product sizes have default efficiency <= " << 100.0f * results[n].default_efficiency << " %"
+           << endl;
    }
    cout.precision(default_precision);
  }
 };

-
-void show_usage_and_exit(int argc, char* argv[],
-                         const vector<unique_ptr<action_t>>& available_actions)
-{
+void show_usage_and_exit(int argc, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
  cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
  cerr << "available actions:" << endl;
  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
    cerr << "  " << (*it)->invokation_name() << endl;
-  } 
+  }
  cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
  exit(1);
 }

-int main(int argc, char* argv[])
-{
+int main(int argc, char* argv[]) {
  cout.precision(default_precision);
  cerr.precision(default_precision);

--- a/bench/basicbenchmark.cpp
+++ b/bench/basicbenchmark.cpp
@ -3,32 +3,31 @@
 #include "BenchUtil.h"
 #include "basicbenchmark.h"

-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
  DISABLE_SSE_EXCEPTIONS();

-  // this is the list of matrix type and size we want to bench:
-  // ((suffix) (matrix size) (number of iterations))
-  #define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000))
-//   #define MODES ((Xd)(20)(10000))
+// this is the list of matrix type and size we want to bench:
+// ((suffix) (matrix size) (number of iterations))
+#define MODES ((3d)(3)(4000000))((4d)(4)(1000000))((Xd)(4)(1000000))((Xd)(20)(10000))
+  //   #define MODES ((Xd)(20)(10000))

-  #define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \
-    << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \
-    << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "   /   "
+#define _GENERATE_HEADER(R, ARG, EL)           \
+  << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) \
+                        << "-"                 \
+                        << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "   /   "

-  std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl;
+  std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES) << endl;

  const int tries = 10;

-  #define _RUN_BENCH(R,ARG,EL) \
-    std::cout << ARG( \
-      BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\
-         BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \
-    << "   ";
+#define _RUN_BENCH(R, ARG, EL)                                                                                      \
+  std::cout << ARG(BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL))(BOOST_PP_SEQ_ELEM(1, EL), BOOST_PP_SEQ_ELEM(1, EL)), \
+                   BOOST_PP_SEQ_ELEM(2, EL), tries)                                                                 \
+            << "   ";

-  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES );
+  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES);
  std::cout << endl;
-  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES );
+  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES);
  std::cout << endl;

  return 0;
--- a/bench/basicbenchmark.h
+++ b/bench/basicbenchmark.h
@ -2,55 +2,46 @@
 #ifndef EIGEN_BENCH_BASICBENCH_H
 #define EIGEN_BENCH_BASICBENCH_H

-enum {LazyEval, EarlyEval, OmpEval};
+enum { LazyEval, EarlyEval, OmpEval };

-template<int Mode, typename MatrixType>
+template <int Mode, typename MatrixType>
 void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));

-template<int Mode, typename MatrixType>
-void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations)
-{
-  for(int a = 0; a < iterations; a++)
-  {
-    if (Mode==LazyEval)
-    {
+template <int Mode, typename MatrixType>
+void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) {
+  for (int a = 0; a < iterations; a++) {
+    if (Mode == LazyEval) {
      asm("#begin_bench_loop LazyEval");
-      if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
+      if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
      m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
-    }
-    else if (Mode==OmpEval)
-    {
+    } else if (Mode == OmpEval) {
      asm("#begin_bench_loop OmpEval");
-      if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
+      if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
      m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
-    }
-    else
-    {
+    } else {
      asm("#begin_bench_loop EarlyEval");
-      if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
+      if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
      m = I + 0.00005 * (m + m * m);
    }
    asm("#end_bench_loop");
  }
 }

-template<int Mode, typename MatrixType>
+template <int Mode, typename MatrixType>
 double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));

-template<int Mode, typename MatrixType>
-double benchBasic(const MatrixType& mat, int iterations, int tries)
-{
+template <int Mode, typename MatrixType>
+double benchBasic(const MatrixType& mat, int iterations, int tries) {
  const int rows = mat.rows();
  const int cols = mat.cols();

-  MatrixType I(rows,cols);
-  MatrixType m(rows,cols);
+  MatrixType I(rows, cols);
+  MatrixType m(rows, cols);

  initMatrix_identity(I);

  Eigen::BenchTimer timer;
-  for(uint t=0; t<tries; ++t)
-  {
+  for (uint t = 0; t < tries; ++t) {
    initMatrix_random(m);
    timer.start();
    benchBasic_loop<Mode>(I, m, iterations);
@ -60,4 +51,4 @@ double benchBasic(const MatrixType& mat, int iterations, int tries)
  return timer.value();
 };

-#endif // EIGEN_BENCH_BASICBENCH_H
+#endif  // EIGEN_BENCH_BASICBENCH_H
--- a/bench/benchBlasGemm.cpp
+++ b/bench/benchBlasGemm.cpp
@ -25,59 +25,47 @@ typedef double Scalar;
 #define CBLAS_GEMM cblas_dgemm
 #endif

-
-typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
+typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix;
 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
 void check_product(int M, int N, int K);
 void check_product(void);

-int main(int argc, char *argv[])
-{
-  // disable SSE exceptions
-  #ifdef __GNUC__
+int main(int argc, char* argv[]) {
+// disable SSE exceptions
+#ifdef __GNUC__
  {
    int aux;
-    asm(
-    "stmxcsr   %[aux]           \n\t"
-    "orl       $32832, %[aux]   \n\t"
-    "ldmxcsr   %[aux]           \n\t"
-    : : [aux] "m" (aux));
+    asm("stmxcsr   %[aux]           \n\t"
+        "orl       $32832, %[aux]   \n\t"
+        "ldmxcsr   %[aux]           \n\t"
+        :
+        : [aux] "m"(aux));
  }
-  #endif
+#endif

-  int nbtries=1, nbloops=1, M, N, K;
+  int nbtries = 1, nbloops = 1, M, N, K;

-  if (argc==2)
-  {
-    if (std::string(argv[1])=="check")
+  if (argc == 2) {
+    if (std::string(argv[1]) == "check")
      check_product();
    else
      M = N = K = atoi(argv[1]);
-  }
-  else if ((argc==3) && (std::string(argv[1])=="auto"))
-  {
+  } else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
    M = N = K = atoi(argv[2]);
-    nbloops = 1000000000/(M*M*M);
-    if (nbloops<1)
-      nbloops = 1;
+    nbloops = 1000000000 / (M * M * M);
+    if (nbloops < 1) nbloops = 1;
    nbtries = 6;
-  }
-  else if (argc==4)
-  {
+  } else if (argc == 4) {
    M = N = K = atoi(argv[1]);
    nbloops = atoi(argv[2]);
    nbtries = atoi(argv[3]);
-  }
-  else if (argc==6)
-  {
+  } else if (argc == 6) {
    M = atoi(argv[1]);
    N = atoi(argv[2]);
    K = atoi(argv[3]);
    nbloops = atoi(argv[4]);
    nbtries = atoi(argv[5]);
-  }
-  else
-  {
+  } else {
    std::cout << "Usage: " << argv[0] << " size  \n";
    std::cout << "Usage: " << argv[0] << " auto size\n";
    std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
@ -95,14 +83,13 @@ int main(int argc, char *argv[])

  double nbmad = double(M) * double(N) * double(K) * double(nbloops);

-  if (!(std::string(argv[1])=="auto"))
-    std::cout << M << " x " << N << " x " << K << "\n";
+  if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n";

  Scalar alpha, beta;
-  MyMatrix ma(M,K), mb(K,N), mc(M,N);
-  ma = MyMatrix::Random(M,K);
-  mb = MyMatrix::Random(K,N);
-  mc = MyMatrix::Random(M,N);
+  MyMatrix ma(M, K), mb(K, N), mc(M, N);
+  ma = MyMatrix::Random(M, K);
+  mb = MyMatrix::Random(K, N);
+  mc = MyMatrix::Random(M, N);

  Eigen::BenchTimer timer;

@ -112,108 +99,101 @@ int main(int argc, char *argv[])

  // bench cblas
  // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
-  if (!(std::string(argv[1])=="auto"))
-  {
+  if (!(std::string(argv[1]) == "auto")) {
    timer.reset();
-    for (uint k=0 ; k<nbtries ; ++k)
-    {
-        timer.start();
-        for (uint j=0 ; j<nbloops ; ++j)
-              #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
-              CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
-              #else
-              CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
-              #endif
-        timer.stop();
+    for (uint k = 0; k < nbtries; ++k) {
+      timer.start();
+      for (uint j = 0; j < nbloops; ++j)
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+        CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta,
+                   mc.data(), N);
+#else
+        CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta,
+                   mc.data(), M);
+#endif
+      timer.stop();
    }
-    if (!(std::string(argv[1])=="auto"))
-      std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
+    if (!(std::string(argv[1]) == "auto"))
+      std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
    else
-        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
+      std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
  }

  // clear
-  ma = MyMatrix::Random(M,K);
-  mb = MyMatrix::Random(K,N);
-  mc = MyMatrix::Random(M,N);
+  ma = MyMatrix::Random(M, K);
+  mb = MyMatrix::Random(K, N);
+  mc = MyMatrix::Random(M, N);

  // eigen
-//   if (!(std::string(argv[1])=="auto"))
+  //   if (!(std::string(argv[1])=="auto"))
  {
-      timer.reset();
-      for (uint k=0 ; k<nbtries ; ++k)
-      {
-          timer.start();
-          bench_eigengemm(mc, ma, mb, nbloops);
-          timer.stop();
-      }
-      if (!(std::string(argv[1])=="auto"))
-        std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
-      else
-        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
+    timer.reset();
+    for (uint k = 0; k < nbtries; ++k) {
+      timer.start();
+      bench_eigengemm(mc, ma, mb, nbloops);
+      timer.stop();
+    }
+    if (!(std::string(argv[1]) == "auto"))
+      std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
+    else
+      std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
  }

  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
-  

  return 0;
 }

 using namespace Eigen;

-void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
-{
-  for (uint j=0 ; j<nbloops ; ++j)
-      mc.noalias() += ma * mb;
+void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) {
+  for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
 }

-#define MYVERIFY(A,M) if (!(A)) { \
+#define MYVERIFY(A, M)                  \
+  if (!(A)) {                           \
    std::cout << "FAIL: " << M << "\n"; \
  }
-void check_product(int M, int N, int K)
-{
-  MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
-  ma = MyMatrix::Random(M,K);
-  mb = MyMatrix::Random(K,N);
+void check_product(int M, int N, int K) {
+  MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N);
+  ma = MyMatrix::Random(M, K);
+  mb = MyMatrix::Random(K, N);
  maT = ma.transpose();
  mbT = mb.transpose();
-  mc = MyMatrix::Random(M,N);
+  mc = MyMatrix::Random(M, N);

  MyMatrix::Scalar eps = 1e-4;

  meigen = mref = mc;
  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
  meigen += ma * mb;
-  MYVERIFY(meigen.isApprox(mref, eps),". * .");
+  MYVERIFY(meigen.isApprox(mref, eps), ". * .");

  meigen = mref = mc;
  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
  meigen += maT.transpose() * mb;
-  MYVERIFY(meigen.isApprox(mref, eps),"T * .");
+  MYVERIFY(meigen.isApprox(mref, eps), "T * .");

  meigen = mref = mc;
  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
  meigen += (maT.transpose()) * (mbT.transpose());
-  MYVERIFY(meigen.isApprox(mref, eps),"T * T");
+  MYVERIFY(meigen.isApprox(mref, eps), "T * T");

  meigen = mref = mc;
  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
  meigen += ma * mbT.transpose();
-  MYVERIFY(meigen.isApprox(mref, eps),". * T");
+  MYVERIFY(meigen.isApprox(mref, eps), ". * T");
 }

-void check_product(void)
-{
+void check_product(void) {
  int M, N, K;
-  for (uint i=0; i<1000; ++i)
-  {
-    M = internal::random<int>(1,64);
-    N = internal::random<int>(1,768);
-    K = internal::random<int>(1,768);
+  for (uint i = 0; i < 1000; ++i) {
+    M = internal::random<int>(1, 64);
+    N = internal::random<int>(1, 768);
+    K = internal::random<int>(1, 768);
    M = (0 + M) * 1;
    std::cout << M << " x " << N << " x " << K << "\n";
    check_product(M, N, K);
  }
 }
-
--- a/bench/benchCholesky.cpp
+++ b/bench/benchCholesky.cpp
@ -25,117 +25,100 @@ using namespace Eigen;
 typedef float Scalar;

 template <typename MatrixType>
-__attribute__ ((noinline)) void benchLLT(const MatrixType& m)
-{
+__attribute__((noinline)) void benchLLT(const MatrixType& m) {
  int rows = m.rows();
  int cols = m.cols();

  double cost = 0;
-  for (int j=0; j<rows; ++j)
-  {
-    int r = std::max(rows - j -1,0);
-    cost += 2*(r*j+r+j);
+  for (int j = 0; j < rows; ++j) {
+    int r = std::max(rows - j - 1, 0);
+    cost += 2 * (r * j + r + j);
  }

-  int repeats = (REPEAT*1000)/(rows*rows);
+  int repeats = (REPEAT * 1000) / (rows * rows);

  typedef typename MatrixType::Scalar Scalar;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;

-  MatrixType a = MatrixType::Random(rows,cols);
-  SquareMatrixType covMat =  a * a.adjoint();
+  MatrixType a = MatrixType::Random(rows, cols);
+  SquareMatrixType covMat = a * a.adjoint();

  BenchTimer timerNoSqrt, timerSqrt;

  Scalar acc = 0;
-  int r = internal::random<int>(0,covMat.rows()-1);
-  int c = internal::random<int>(0,covMat.cols()-1);
-  for (int t=0; t<TRIES; ++t)
-  {
+  int r = internal::random<int>(0, covMat.rows() - 1);
+  int c = internal::random<int>(0, covMat.cols() - 1);
+  for (int t = 0; t < TRIES; ++t) {
    timerNoSqrt.start();
-    for (int k=0; k<repeats; ++k)
-    {
+    for (int k = 0; k < repeats; ++k) {
      LDLT<SquareMatrixType> cholnosqrt(covMat);
-      acc += cholnosqrt.matrixL().coeff(r,c);
+      acc += cholnosqrt.matrixL().coeff(r, c);
    }
    timerNoSqrt.stop();
  }

-  for (int t=0; t<TRIES; ++t)
-  {
+  for (int t = 0; t < TRIES; ++t) {
    timerSqrt.start();
-    for (int k=0; k<repeats; ++k)
-    {
+    for (int k = 0; k < repeats; ++k) {
      LLT<SquareMatrixType> chol(covMat);
-      acc += chol.matrixL().coeff(r,c);
+      acc += chol.matrixL().coeff(r, c);
    }
    timerSqrt.stop();
  }

-  if (MatrixType::RowsAtCompileTime==Dynamic)
+  if (MatrixType::RowsAtCompileTime == Dynamic)
    std::cout << "dyn   ";
  else
    std::cout << "fixed ";
-  std::cout << covMat.rows() << " \t"
-            << (timerNoSqrt.best()) / repeats << "s "
-            << "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t"
-            << (timerSqrt.best()) / repeats << "s "
-            << "(" << 1e-9 * cost*repeats/timerSqrt.best() << " GFLOPS)\n";
+  std::cout << covMat.rows() << " \t" << (timerNoSqrt.best()) / repeats << "s "
+            << "(" << 1e-9 * cost * repeats / timerNoSqrt.best() << " GFLOPS)\t" << (timerSqrt.best()) / repeats << "s "
+            << "(" << 1e-9 * cost * repeats / timerSqrt.best() << " GFLOPS)\n";

-
-  #ifdef BENCH_GSL
-  if (MatrixType::RowsAtCompileTime==Dynamic)
-  {
+#ifdef BENCH_GSL
+  if (MatrixType::RowsAtCompileTime == Dynamic) {
    timerSqrt.reset();

-    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
-    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
+    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
+    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());

    eiToGsl(covMat, &gslCovMat);
-    for (int t=0; t<TRIES; ++t)
-    {
+    for (int t = 0; t < TRIES; ++t) {
      timerSqrt.start();
-      for (int k=0; k<repeats; ++k)
-      {
-        gsl_matrix_memcpy(gslCopy,gslCovMat);
+      for (int k = 0; k < repeats; ++k) {
+        gsl_matrix_memcpy(gslCopy, gslCovMat);
        gsl_linalg_cholesky_decomp(gslCopy);
-        acc += gsl_matrix_get(gslCopy,r,c);
+        acc += gsl_matrix_get(gslCopy, r, c);
      }
      timerSqrt.stop();
    }

-    std::cout << " | \t"
-              << timerSqrt.value() * REPEAT / repeats << "s";
+    std::cout << " | \t" << timerSqrt.value() * REPEAT / repeats << "s";

    gsl_matrix_free(gslCovMat);
  }
-  #endif
+#endif
  std::cout << "\n";
  // make sure the compiler does not optimize too much
-  if (acc==123)
-    std::cout << acc;
+  if (acc == 123) std::cout << acc;
 }

-int main(int argc, char* argv[])
-{
-  const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,1500,0};
+int main(int argc, char* argv[]) {
+  const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 1500, 0};
  std::cout << "size            LDLT                            LLT";
-//   #ifdef BENCH_GSL
-//   std::cout << "       GSL (standard + double + ATLAS)  ";
-//   #endif
+  //   #ifdef BENCH_GSL
+  //   std::cout << "       GSL (standard + double + ATLAS)  ";
+  //   #endif
  std::cout << "\n";
-  for (int i=0; dynsizes[i]>0; ++i)
-    benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
+  for (int i = 0; dynsizes[i] > 0; ++i) benchLLT(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));

-  benchLLT(Matrix<Scalar,2,2>());
-  benchLLT(Matrix<Scalar,3,3>());
-  benchLLT(Matrix<Scalar,4,4>());
-  benchLLT(Matrix<Scalar,5,5>());
-  benchLLT(Matrix<Scalar,6,6>());
-  benchLLT(Matrix<Scalar,7,7>());
-  benchLLT(Matrix<Scalar,8,8>());
-  benchLLT(Matrix<Scalar,12,12>());
-  benchLLT(Matrix<Scalar,16,16>());
+  benchLLT(Matrix<Scalar, 2, 2>());
+  benchLLT(Matrix<Scalar, 3, 3>());
+  benchLLT(Matrix<Scalar, 4, 4>());
+  benchLLT(Matrix<Scalar, 5, 5>());
+  benchLLT(Matrix<Scalar, 6, 6>());
+  benchLLT(Matrix<Scalar, 7, 7>());
+  benchLLT(Matrix<Scalar, 8, 8>());
+  benchLLT(Matrix<Scalar, 12, 12>());
+  benchLLT(Matrix<Scalar, 16, 16>());
  return 0;
 }
-
--- a/bench/benchEigenSolver.cpp
+++ b/bench/benchEigenSolver.cpp
@ -31,34 +31,31 @@ using namespace Eigen;
 typedef SCALAR Scalar;

 template <typename MatrixType>
-__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
-{
+__attribute__((noinline)) void benchEigenSolver(const MatrixType& m) {
  int rows = m.rows();
  int cols = m.cols();

-  int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
+  int stdRepeats = std::max(1, int((REPEAT * 1000) / (rows * rows * sqrt(rows))));
  int saRepeats = stdRepeats * 4;

  typedef typename MatrixType::Scalar Scalar;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;

-  MatrixType a = MatrixType::Random(rows,cols);
-  SquareMatrixType covMat =  a * a.adjoint();
+  MatrixType a = MatrixType::Random(rows, cols);
+  SquareMatrixType covMat = a * a.adjoint();

  BenchTimer timerSa, timerStd;

  Scalar acc = 0;
-  int r = internal::random<int>(0,covMat.rows()-1);
-  int c = internal::random<int>(0,covMat.cols()-1);
+  int r = internal::random<int>(0, covMat.rows() - 1);
+  int c = internal::random<int>(0, covMat.cols() - 1);
  {
    SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
-    for (int t=0; t<TRIES; ++t)
-    {
+    for (int t = 0; t < TRIES; ++t) {
      timerSa.start();
-      for (int k=0; k<saRepeats; ++k)
-      {
+      for (int k = 0; k < saRepeats; ++k) {
        ei.compute(covMat);
-        acc += ei.eigenvectors().coeff(r,c);
+        acc += ei.eigenvectors().coeff(r, c);
      }
      timerSa.stop();
    }
@ -66,107 +63,94 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)

  {
    EigenSolver<SquareMatrixType> ei(covMat);
-    for (int t=0; t<TRIES; ++t)
-    {
+    for (int t = 0; t < TRIES; ++t) {
      timerStd.start();
-      for (int k=0; k<stdRepeats; ++k)
-      {
+      for (int k = 0; k < stdRepeats; ++k) {
        ei.compute(covMat);
-        acc += ei.eigenvectors().coeff(r,c);
+        acc += ei.eigenvectors().coeff(r, c);
      }
      timerStd.stop();
    }
  }

-  if (MatrixType::RowsAtCompileTime==Dynamic)
+  if (MatrixType::RowsAtCompileTime == Dynamic)
    std::cout << "dyn   ";
  else
    std::cout << "fixed ";
-  std::cout << covMat.rows() << " \t"
-            << timerSa.value() * REPEAT / saRepeats << "s \t"
+  std::cout << covMat.rows() << " \t" << timerSa.value() * REPEAT / saRepeats << "s \t"
            << timerStd.value() * REPEAT / stdRepeats << "s";

-  #ifdef BENCH_GMM
-  if (MatrixType::RowsAtCompileTime==Dynamic)
-  {
+#ifdef BENCH_GMM
+  if (MatrixType::RowsAtCompileTime == Dynamic) {
    timerSa.reset();
    timerStd.reset();

-    gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
-    gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
+    gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(), covMat.cols());
+    gmm::dense_matrix<Scalar> eigvect(covMat.rows(), covMat.cols());
    std::vector<Scalar> eigval(covMat.rows());
    eiToGmm(covMat, gmmCovMat);
-    for (int t=0; t<TRIES; ++t)
-    {
+    for (int t = 0; t < TRIES; ++t) {
      timerSa.start();
-      for (int k=0; k<saRepeats; ++k)
-      {
+      for (int k = 0; k < saRepeats; ++k) {
        gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
-        acc += eigvect(r,c);
+        acc += eigvect(r, c);
      }
      timerSa.stop();
    }
    // the non-selfadjoint solver does not compute the eigen vectors
-//     for (int t=0; t<TRIES; ++t)
-//     {
-//       timerStd.start();
-//       for (int k=0; k<stdRepeats; ++k)
-//       {
-//         gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
-//         acc += eigvect(r,c);
-//       }
-//       timerStd.stop();
-//     }
+    //     for (int t=0; t<TRIES; ++t)
+    //     {
+    //       timerStd.start();
+    //       for (int k=0; k<stdRepeats; ++k)
+    //       {
+    //         gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
+    //         acc += eigvect(r,c);
+    //       }
+    //       timerStd.stop();
+    //     }

-    std::cout << " | \t"
-              << timerSa.value() * REPEAT / saRepeats << "s"
+    std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s"
              << /*timerStd.value() * REPEAT / stdRepeats << "s"*/ "   na   ";
  }
-  #endif
+#endif

-  #ifdef BENCH_GSL
-  if (MatrixType::RowsAtCompileTime==Dynamic)
-  {
+#ifdef BENCH_GSL
+  if (MatrixType::RowsAtCompileTime == Dynamic) {
    timerSa.reset();
    timerStd.reset();

-    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
-    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
-    gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
-    gsl_vector* eigval  = gsl_vector_alloc(covMat.rows());
+    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
+    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
+    gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(), covMat.cols());
+    gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
    gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
-    
-    gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
-    gsl_vector_complex* eigvalz  = gsl_vector_complex_alloc(covMat.rows());
+
+    gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(), covMat.cols());
+    gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
    gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
-    
+
    eiToGsl(covMat, &gslCovMat);
-    for (int t=0; t<TRIES; ++t)
-    {
+    for (int t = 0; t < TRIES; ++t) {
      timerSa.start();
-      for (int k=0; k<saRepeats; ++k)
-      {
-        gsl_matrix_memcpy(gslCopy,gslCovMat);
+      for (int k = 0; k < saRepeats; ++k) {
+        gsl_matrix_memcpy(gslCopy, gslCovMat);
        gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
-        acc += gsl_matrix_get(eigvect,r,c);
+        acc += gsl_matrix_get(eigvect, r, c);
      }
      timerSa.stop();
    }
-    for (int t=0; t<TRIES; ++t)
-    {
+    for (int t = 0; t < TRIES; ++t) {
      timerStd.start();
-      for (int k=0; k<stdRepeats; ++k)
-      {
-        gsl_matrix_memcpy(gslCopy,gslCovMat);
+      for (int k = 0; k < stdRepeats; ++k) {
+        gsl_matrix_memcpy(gslCopy, gslCovMat);
        gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
-        acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
+        acc += GSL_REAL(gsl_matrix_complex_get(eigvectz, r, c));
      }
      timerStd.stop();
    }

-    std::cout << " | \t"
-              << timerSa.value() * REPEAT / saRepeats << "s \t"
-              << timerStd.value() * REPEAT / stdRepeats << "s";
+    std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s \t" << timerStd.value() * REPEAT / stdRepeats
+              << "s";

    gsl_matrix_free(gslCovMat);
    gsl_vector_free(gslCopy);
@ -177,36 +161,32 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
    gsl_eigen_symmv_free(eisymm);
    gsl_eigen_nonsymmv_free(einonsymm);
  }
-  #endif
+#endif

  std::cout << "\n";
-  
+
  // make sure the compiler does not optimize too much
-  if (acc==123)
-    std::cout << acc;
+  if (acc == 123) std::cout << acc;
 }

-int main(int argc, char* argv[])
-{
-  const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
+int main(int argc, char* argv[]) {
+  const int dynsizes[] = {4, 6, 8, 12, 16, 24, 32, 64, 128, 256, 512, 0};
  std::cout << "size            selfadjoint       generic";
-  #ifdef BENCH_GMM
+#ifdef BENCH_GMM
  std::cout << "        GMM++          ";
-  #endif
-  #ifdef BENCH_GSL
+#endif
+#ifdef BENCH_GSL
  std::cout << "       GSL (double + ATLAS)  ";
-  #endif
+#endif
  std::cout << "\n";
-  for (uint i=0; dynsizes[i]>0; ++i)
-    benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
+  for (uint i = 0; dynsizes[i] > 0; ++i) benchEigenSolver(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));

-  benchEigenSolver(Matrix<Scalar,2,2>());
-  benchEigenSolver(Matrix<Scalar,3,3>());
-  benchEigenSolver(Matrix<Scalar,4,4>());
-  benchEigenSolver(Matrix<Scalar,6,6>());
-  benchEigenSolver(Matrix<Scalar,8,8>());
-  benchEigenSolver(Matrix<Scalar,12,12>());
-  benchEigenSolver(Matrix<Scalar,16,16>());
+  benchEigenSolver(Matrix<Scalar, 2, 2>());
+  benchEigenSolver(Matrix<Scalar, 3, 3>());
+  benchEigenSolver(Matrix<Scalar, 4, 4>());
+  benchEigenSolver(Matrix<Scalar, 6, 6>());
+  benchEigenSolver(Matrix<Scalar, 8, 8>());
+  benchEigenSolver(Matrix<Scalar, 12, 12>());
+  benchEigenSolver(Matrix<Scalar, 16, 16>());
  return 0;
 }
-
--- a/bench/benchFFT.cpp
+++ b/bench/benchFFT.cpp
@ -19,13 +19,21 @@
 using namespace Eigen;
 using namespace std;

-
 template <typename T>
 string nameof();

-template <> string nameof<float>() {return "float";}
-template <> string nameof<double>() {return "double";}
-template <> string nameof<long double>() {return "long double";}
+template <>
+string nameof<float>() {
+  return "float";
+}
+template <>
+string nameof<double>() {
+  return "double";
+}
+template <>
+string nameof<long double>() {
+  return "long double";
+}

 #ifndef TYPE
 #define TYPE float
@ -41,75 +49,69 @@ template <> string nameof<long double>() {return "long double";}
 using namespace Eigen;

 template <typename T>
-void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
-{
-    typedef typename NumTraits<T>::Real Scalar;
-    typedef typename std::complex<Scalar> Complex;
-    int nits = NDATA/nfft;
-    vector<T> inbuf(nfft);
-    vector<Complex > outbuf(nfft);
-    FFT< Scalar > fft;
+void bench(int nfft, bool fwd, bool unscaled = false, bool halfspec = false) {
+  typedef typename NumTraits<T>::Real Scalar;
+  typedef typename std::complex<Scalar> Complex;
+  int nits = NDATA / nfft;
+  vector<T> inbuf(nfft);
+  vector<Complex> outbuf(nfft);
+  FFT<Scalar> fft;

-    if (unscaled) {
-        fft.SetFlag(fft.Unscaled);
-        cout << "unscaled ";
-    }
-    if (halfspec) {
-        fft.SetFlag(fft.HalfSpectrum);
-        cout << "halfspec ";
-    }
-
-
-    std::fill(inbuf.begin(),inbuf.end(),0);
-    fft.fwd( outbuf , inbuf);
-
-    BenchTimer timer;
-    timer.reset();
-    for (int k=0;k<8;++k) {
-        timer.start();
-        if (fwd)
-            for(int i = 0; i < nits; i++)
-                fft.fwd( outbuf , inbuf);
-        else
-            for(int i = 0; i < nits; i++)
-                fft.inv(inbuf,outbuf);
-        timer.stop();
-    }
-
-    cout << nameof<Scalar>() << " ";
-    double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits );
-    if ( NumTraits<T>::IsComplex ) {
-        cout << "complex";
-    }else{
-        cout << "real   ";
-        mflops /= 2;
-    }
+  if (unscaled) {
+    fft.SetFlag(fft.Unscaled);
+    cout << "unscaled ";
+  }
+  if (halfspec) {
+    fft.SetFlag(fft.HalfSpectrum);
+    cout << "halfspec ";
+  }

+  std::fill(inbuf.begin(), inbuf.end(), 0);
+  fft.fwd(outbuf, inbuf);

+  BenchTimer timer;
+  timer.reset();
+  for (int k = 0; k < 8; ++k) {
+    timer.start();
    if (fwd)
-        cout << " fwd";
+      for (int i = 0; i < nits; i++) fft.fwd(outbuf, inbuf);
    else
-        cout << " inv";
+      for (int i = 0; i < nits; i++) fft.inv(inbuf, outbuf);
+    timer.stop();
+  }

-    cout << " NFFT=" << nfft << "  " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
+  cout << nameof<Scalar>() << " ";
+  double mflops = 5. * nfft * log2((double)nfft) / (1e6 * timer.value() / (double)nits);
+  if (NumTraits<T>::IsComplex) {
+    cout << "complex";
+  } else {
+    cout << "real   ";
+    mflops /= 2;
+  }
+
+  if (fwd)
+    cout << " fwd";
+  else
+    cout << " inv";
+
+  cout << " NFFT=" << nfft << "  " << (double(1e-6 * nfft * nits) / timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
 }

-int main(int argc,char ** argv)
-{
-    bench<complex<float> >(NFFT,true);
-    bench<complex<float> >(NFFT,false);
-    bench<float>(NFFT,true);
-    bench<float>(NFFT,false);
-    bench<float>(NFFT,false,true);
-    bench<float>(NFFT,false,true,true);
+int main(int argc, char** argv) {
+  bench<complex<float> >(NFFT, true);
+  bench<complex<float> >(NFFT, false);
+  bench<float>(NFFT, true);
+  bench<float>(NFFT, false);
+  bench<float>(NFFT, false, true);
+  bench<float>(NFFT, false, true, true);

-    bench<complex<double> >(NFFT,true);
-    bench<complex<double> >(NFFT,false);
-    bench<double>(NFFT,true);
-    bench<double>(NFFT,false);
-    bench<complex<long double> >(NFFT,true);
-    bench<complex<long double> >(NFFT,false);
-    bench<long double>(NFFT,true);
-    bench<long double>(NFFT,false);
-    return 0;
+  bench<complex<double> >(NFFT, true);
+  bench<complex<double> >(NFFT, false);
+  bench<double>(NFFT, true);
+  bench<double>(NFFT, false);
+  bench<complex<long double> >(NFFT, true);
+  bench<complex<long double> >(NFFT, false);
+  bench<long double>(NFFT, true);
+  bench<long double>(NFFT, false);
+  return 0;
 }
--- a/bench/benchGeometry.cpp
+++ b/bench/benchGeometry.cpp
@ -11,124 +11,110 @@ using namespace std;
 #define REPEAT 1000000
 #endif

-enum func_opt
-{
-    TV,
-    TMATV,
-    TMATVMAT,
+enum func_opt {
+  TV,
+  TMATV,
+  TMATVMAT,
 };

-
 template <class res, class arg1, class arg2, int opt>
 struct func;

 template <class res, class arg1, class arg2>
-struct func<res, arg1, arg2, TV>
-{
-    static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
-    {
-	asm ("");
-	return a1 * a2;
-    }
+struct func<res, arg1, arg2, TV> {
+  static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
+    asm("");
+    return a1 * a2;
+  }
 };

 template <class res, class arg1, class arg2>
-struct func<res, arg1, arg2, TMATV>
-{
-    static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
-    {
-	asm ("");
-	return a1.matrix() * a2;
-    }
+struct func<res, arg1, arg2, TMATV> {
+  static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
+    asm("");
+    return a1.matrix() * a2;
+  }
 };

 template <class res, class arg1, class arg2>
-struct func<res, arg1, arg2, TMATVMAT>
-{
-    static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
-    {
-	asm ("");
-	return res(a1.matrix() * a2.matrix());
-    }
+struct func<res, arg1, arg2, TMATVMAT> {
+  static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
+    asm("");
+    return res(a1.matrix() * a2.matrix());
+  }
 };

 template <class func, class arg1, class arg2>
-struct test_transform
-{
-    static void run()
-    {
-	arg1 a1;
-	a1.setIdentity();
-	arg2 a2;
-	a2.setIdentity();
+struct test_transform {
+  static void run() {
+    arg1 a1;
+    a1.setIdentity();
+    arg2 a2;
+    a2.setIdentity();

-	BenchTimer timer;
-	timer.reset();
-	for (int k=0; k<10; ++k)
-	{
-	    timer.start();
-	    for (int k=0; k<REPEAT; ++k)
-		a2 = func::run( a1, a2 );
-	    timer.stop();
-	}
-	cout << setprecision(4) << fixed << timer.value() << "s  " << endl;;
+    BenchTimer timer;
+    timer.reset();
+    for (int k = 0; k < 10; ++k) {
+      timer.start();
+      for (int k = 0; k < REPEAT; ++k) a2 = func::run(a1, a2);
+      timer.stop();
    }
+    cout << setprecision(4) << fixed << timer.value() << "s  " << endl;
+    ;
+  }
 };

+#define run_vec(op, scalar, mode, option, vsize)                                   \
+  std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
+  {                                                                                \
+    typedef Transform<scalar, 3, mode, option> Trans;                              \
+    typedef Matrix<scalar, vsize, 1, option> Vec;                                  \
+    typedef func<Vec, Trans, Vec, op> Func;                                        \
+    test_transform<Func, Trans, Vec>::run();                                       \
+  }

-#define run_vec( op, scalar, mode, option, vsize ) \
-    std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
-    {\
-	typedef Transform<scalar, 3, mode, option> Trans;\
-	typedef Matrix<scalar, vsize, 1, option> Vec;\
-	typedef func<Vec,Trans,Vec,op> Func;\
-	test_transform< Func, Trans, Vec >::run();\
-    }
+#define run_trans(op, scalar, mode, option)                            \
+  std::cout << #scalar << "\t " << #mode << "\t " << #option << "   "; \
+  {                                                                    \
+    typedef Transform<scalar, 3, mode, option> Trans;                  \
+    typedef func<Trans, Trans, Trans, op> Func;                        \
+    test_transform<Func, Trans, Trans>::run();                         \
+  }

-#define run_trans( op, scalar, mode, option ) \
-    std::cout << #scalar << "\t " << #mode << "\t " << #option << "   "; \
-    {\
-	typedef Transform<scalar, 3, mode, option> Trans;\
-	typedef func<Trans,Trans,Trans,op> Func;\
-	test_transform< Func, Trans, Trans >::run();\
-    }
+int main(int argc, char* argv[]) {
+  cout << "vec = trans * vec" << endl;
+  run_vec(TV, float, Isometry, AutoAlign, 3);
+  run_vec(TV, float, Isometry, DontAlign, 3);
+  run_vec(TV, float, Isometry, AutoAlign, 4);
+  run_vec(TV, float, Isometry, DontAlign, 4);
+  run_vec(TV, float, Projective, AutoAlign, 4);
+  run_vec(TV, float, Projective, DontAlign, 4);
+  run_vec(TV, double, Isometry, AutoAlign, 3);
+  run_vec(TV, double, Isometry, DontAlign, 3);
+  run_vec(TV, double, Isometry, AutoAlign, 4);
+  run_vec(TV, double, Isometry, DontAlign, 4);
+  run_vec(TV, double, Projective, AutoAlign, 4);
+  run_vec(TV, double, Projective, DontAlign, 4);

-int main(int argc, char* argv[])
-{
-    cout << "vec = trans * vec" << endl;
-    run_vec(TV, float,  Isometry, AutoAlign, 3);
-    run_vec(TV, float,  Isometry, DontAlign, 3);
-    run_vec(TV, float,  Isometry, AutoAlign, 4);
-    run_vec(TV, float,  Isometry, DontAlign, 4);
-    run_vec(TV, float,  Projective, AutoAlign, 4);
-    run_vec(TV, float,  Projective, DontAlign, 4);
-    run_vec(TV, double, Isometry, AutoAlign, 3);
-    run_vec(TV, double, Isometry, DontAlign, 3);
-    run_vec(TV, double, Isometry, AutoAlign, 4);
-    run_vec(TV, double, Isometry, DontAlign, 4);
-    run_vec(TV, double, Projective, AutoAlign, 4);
-    run_vec(TV, double, Projective, DontAlign, 4);
+  cout << "vec = trans.matrix() * vec" << endl;
+  run_vec(TMATV, float, Isometry, AutoAlign, 4);
+  run_vec(TMATV, float, Isometry, DontAlign, 4);
+  run_vec(TMATV, double, Isometry, AutoAlign, 4);
+  run_vec(TMATV, double, Isometry, DontAlign, 4);

-    cout << "vec = trans.matrix() * vec" << endl;
-    run_vec(TMATV, float,  Isometry, AutoAlign, 4);
-    run_vec(TMATV, float,  Isometry, DontAlign, 4);
-    run_vec(TMATV, double, Isometry, AutoAlign, 4);
-    run_vec(TMATV, double, Isometry, DontAlign, 4);
+  cout << "trans = trans1 * trans" << endl;
+  run_trans(TV, float, Isometry, AutoAlign);
+  run_trans(TV, float, Isometry, DontAlign);
+  run_trans(TV, double, Isometry, AutoAlign);
+  run_trans(TV, double, Isometry, DontAlign);
+  run_trans(TV, float, Projective, AutoAlign);
+  run_trans(TV, float, Projective, DontAlign);
+  run_trans(TV, double, Projective, AutoAlign);
+  run_trans(TV, double, Projective, DontAlign);

-    cout << "trans = trans1 * trans" << endl;
-    run_trans(TV, float,  Isometry, AutoAlign);
-    run_trans(TV, float,  Isometry, DontAlign);
-    run_trans(TV, double, Isometry, AutoAlign);
-    run_trans(TV, double, Isometry, DontAlign);
-    run_trans(TV, float,  Projective, AutoAlign);
-    run_trans(TV, float,  Projective, DontAlign);
-    run_trans(TV, double, Projective, AutoAlign);
-    run_trans(TV, double, Projective, DontAlign);
-
-    cout << "trans = trans1.matrix() * trans.matrix()" << endl;
-    run_trans(TMATVMAT, float,  Isometry, AutoAlign);
-    run_trans(TMATVMAT, float,  Isometry, DontAlign);
-    run_trans(TMATVMAT, double, Isometry, AutoAlign);
-    run_trans(TMATVMAT, double, Isometry, DontAlign);
+  cout << "trans = trans1.matrix() * trans.matrix()" << endl;
+  run_trans(TMATVMAT, float, Isometry, AutoAlign);
+  run_trans(TMATVMAT, float, Isometry, DontAlign);
+  run_trans(TMATVMAT, double, Isometry, AutoAlign);
+  run_trans(TMATVMAT, double, Isometry, DontAlign);
 }
-
--- a/bench/benchVecAdd.cpp
+++ b/bench/benchVecAdd.cpp
@ -14,122 +14,118 @@ using namespace Eigen;

 typedef float Scalar;

-__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
-__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
-__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
+__attribute__((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
+__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
+__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);

-int main(int argc, char* argv[])
-{
-    int size = SIZE * 8;
-    int size2 = size * size;
-    Scalar* a = internal::aligned_new<Scalar>(size2);
-    Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
-    Scalar* c = internal::aligned_new<Scalar>(size2); 
-    
-    for (int i=0; i<size; ++i)
-    {
-        a[i] = b[i] = c[i] = 0;
-    }
-    
-    BenchTimer timer;
-    
-    timer.reset();
-    for (int k=0; k<10; ++k)
-    {
+int main(int argc, char* argv[]) {
+  int size = SIZE * 8;
+  int size2 = size * size;
+  Scalar* a = internal::aligned_new<Scalar>(size2);
+  Scalar* b = internal::aligned_new<Scalar>(size2 + 4) + 1;
+  Scalar* c = internal::aligned_new<Scalar>(size2);
+
+  for (int i = 0; i < size; ++i) {
+    a[i] = b[i] = c[i] = 0;
+  }
+
+  BenchTimer timer;
+
+  timer.reset();
+  for (int k = 0; k < 10; ++k) {
+    timer.start();
+    benchVec(a, b, c, size2);
+    timer.stop();
+  }
+  std::cout << timer.value() << "s  " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
+            << " GFlops\n";
+  return 0;
+  for (int innersize = size; innersize > 2; --innersize) {
+    if (size2 % innersize == 0) {
+      int outersize = size2 / innersize;
+      MatrixXf ma = Map<MatrixXf>(a, innersize, outersize);
+      MatrixXf mb = Map<MatrixXf>(b, innersize, outersize);
+      MatrixXf mc = Map<MatrixXf>(c, innersize, outersize);
+      timer.reset();
+      for (int k = 0; k < 3; ++k) {
        timer.start();
-        benchVec(a, b, c, size2);
+        benchVec(ma, mb, mc);
        timer.stop();
+      }
+      std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   "
+                << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n";
    }
-    std::cout << timer.value() << "s  " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
-    return 0;
-    for (int innersize = size; innersize>2 ; --innersize)
-    {
-        if (size2%innersize==0)
-        {
-            int outersize = size2/innersize;
-            MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
-            MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
-            MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
-            timer.reset();
-            for (int k=0; k<3; ++k)
-            {
-                timer.start();
-                benchVec(ma, mb, mc);
-                timer.stop();
-            }
-            std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
-        }
+  }
+
+  VectorXf va = Map<VectorXf>(a, size2);
+  VectorXf vb = Map<VectorXf>(b, size2);
+  VectorXf vc = Map<VectorXf>(c, size2);
+  timer.reset();
+  for (int k = 0; k < 3; ++k) {
+    timer.start();
+    benchVec(va, vb, vc);
+    timer.stop();
+  }
+  std::cout << timer.value() << "s   " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
+            << " GFlops\n";
+
+  return 0;
+}
+
+void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) {
+  for (int k = 0; k < REPEAT; ++k) a = a + b;
+}
+
+void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) {
+  for (int k = 0; k < REPEAT; ++k) a = a + b;
+}
+
+void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) {
+  typedef internal::packet_traits<Scalar>::type PacketScalar;
+  const int PacketSize = internal::packet_traits<Scalar>::size;
+  PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
+  for (int k = 0; k < REPEAT; ++k)
+    for (int i = 0; i < size; i += PacketSize * 8) {
+      //             a0 = internal::pload(&a[i]);
+      //             b0 = internal::pload(&b[i]);
+      //             a1 = internal::pload(&a[i+1*PacketSize]);
+      //             b1 = internal::pload(&b[i+1*PacketSize]);
+      //             a2 = internal::pload(&a[i+2*PacketSize]);
+      //             b2 = internal::pload(&b[i+2*PacketSize]);
+      //             a3 = internal::pload(&a[i+3*PacketSize]);
+      //             b3 = internal::pload(&b[i+3*PacketSize]);
+      //             internal::pstore(&a[i], internal::padd(a0, b0));
+      //             a0 = internal::pload(&a[i+4*PacketSize]);
+      //             b0 = internal::pload(&b[i+4*PacketSize]);
+      //
+      //             internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
+      //             a1 = internal::pload(&a[i+5*PacketSize]);
+      //             b1 = internal::pload(&b[i+5*PacketSize]);
+      //
+      //             internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
+      //             a2 = internal::pload(&a[i+6*PacketSize]);
+      //             b2 = internal::pload(&b[i+6*PacketSize]);
+      //
+      //             internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
+      //             a3 = internal::pload(&a[i+7*PacketSize]);
+      //             b3 = internal::pload(&b[i+7*PacketSize]);
+      //
+      //             internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
+      //             internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
+      //             internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
+      //             internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
+
+      internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]),
+                                                              internal::ploadu(&b[i + 2 * PacketSize])));
+      internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]),
+                                                              internal::ploadu(&b[i + 3 * PacketSize])));
+      internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]),
+                                                              internal::ploadu(&b[i + 4 * PacketSize])));
+      internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]),
+                                                              internal::ploadu(&b[i + 5 * PacketSize])));
+      internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]),
+                                                              internal::ploadu(&b[i + 6 * PacketSize])));
+      internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]),
+                                                              internal::ploadu(&b[i + 7 * PacketSize])));
    }
-    
-    VectorXf va = Map<VectorXf>(a, size2);
-    VectorXf vb = Map<VectorXf>(b, size2);
-    VectorXf vc = Map<VectorXf>(c, size2);
-    timer.reset();
-    for (int k=0; k<3; ++k)
-    {
-        timer.start();
-        benchVec(va, vb, vc);
-        timer.stop();
-    }
-    std::cout << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
-
-    return 0;
-}
-
-void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
-{
-    for (int k=0; k<REPEAT; ++k)
-        a = a + b;
-}
-
-void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
-{
-    for (int k=0; k<REPEAT; ++k)
-        a = a + b;
-}
-
-void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
-{
-    typedef internal::packet_traits<Scalar>::type PacketScalar;
-    const int PacketSize = internal::packet_traits<Scalar>::size;
-    PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
-    for (int k=0; k<REPEAT; ++k)
-        for (int i=0; i<size; i+=PacketSize*8)
-        {
-//             a0 = internal::pload(&a[i]);
-//             b0 = internal::pload(&b[i]);
-//             a1 = internal::pload(&a[i+1*PacketSize]);
-//             b1 = internal::pload(&b[i+1*PacketSize]);
-//             a2 = internal::pload(&a[i+2*PacketSize]);
-//             b2 = internal::pload(&b[i+2*PacketSize]);
-//             a3 = internal::pload(&a[i+3*PacketSize]);
-//             b3 = internal::pload(&b[i+3*PacketSize]);
-//             internal::pstore(&a[i], internal::padd(a0, b0));
-//             a0 = internal::pload(&a[i+4*PacketSize]);
-//             b0 = internal::pload(&b[i+4*PacketSize]);
-//             
-//             internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
-//             a1 = internal::pload(&a[i+5*PacketSize]);
-//             b1 = internal::pload(&b[i+5*PacketSize]);
-//             
-//             internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
-//             a2 = internal::pload(&a[i+6*PacketSize]);
-//             b2 = internal::pload(&b[i+6*PacketSize]);
-//             
-//             internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
-//             a3 = internal::pload(&a[i+7*PacketSize]);
-//             b3 = internal::pload(&b[i+7*PacketSize]);
-//             
-//             internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
-//             internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
-//             internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
-//             internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
-            
-            internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
-            internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
-            internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
-            internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
-            internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
-            internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
-        }
 }
--- a/bench/bench_gemm.cpp
+++ b/bench/bench_gemm.cpp
@ -3,7 +3,7 @@
 // icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp  && OMP_NUM_THREADS=2  ./a.out

 // Compilation options:
-// 
+//
 // -DSCALAR=std::complex<double>
 // -DSCALARA=double or -DSCALARB=double
 // -DHAVE_BLAS
@ -14,7 +14,6 @@
 #include <bench/BenchTimer.h>
 #include <Eigen/Core>

-
 using namespace std;
 using namespace Eigen;

@ -45,15 +44,15 @@ const int opt_B = ColMajor;

 typedef SCALAR Scalar;
 typedef NumTraits<Scalar>::Real RealScalar;
-typedef Matrix<SCALARA,Dynamic,Dynamic,opt_A> A;
-typedef Matrix<SCALARB,Dynamic,Dynamic,opt_B> B;
-typedef Matrix<Scalar,Dynamic,Dynamic> C;
-typedef Matrix<RealScalar,Dynamic,Dynamic> M;
+typedef Matrix<SCALARA, Dynamic, Dynamic, opt_A> A;
+typedef Matrix<SCALARB, Dynamic, Dynamic, opt_B> B;
+typedef Matrix<Scalar, Dynamic, Dynamic> C;
+typedef Matrix<RealScalar, Dynamic, Dynamic> M;

 #ifdef HAVE_BLAS

 extern "C" {
-  #include <Eigen/src/misc/blas.h>
+#include <Eigen/src/misc/blas.h>
 }

 static float fone = 1;
@ -65,7 +64,7 @@ static std::complex<float> cfzero = 0;
 static std::complex<double> cdone = 1;
 static std::complex<double> cdzero = 0;
 static char notrans = 'N';
-static char trans = 'T';  
+static char trans = 'T';
 static char nonunit = 'N';
 static char lower = 'L';
 static char right = 'R';
@ -83,60 +82,61 @@ const char transB = trans;
 const char transB = notrans;
 #endif

-template<typename A,typename B>
-void blas_gemm(const A& a, const B& b, MatrixXf& c)
-{
-  int M = c.rows(); int N = c.cols(); int K = a.cols();
-  int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
+template <typename A, typename B>
+void blas_gemm(const A& a, const B& b, MatrixXf& c) {
+  int M = c.rows();
+  int N = c.cols();
+  int K = a.cols();
+  int lda = a.outerStride();
+  int ldb = b.outerStride();
+  int ldc = c.rows();

-  sgemm_(&transA,&transB,&M,&N,&K,&fone,
-         const_cast<float*>(a.data()),&lda,
-         const_cast<float*>(b.data()),&ldb,&fone,
-         c.data(),&ldc);
+  sgemm_(&transA, &transB, &M, &N, &K, &fone, const_cast<float*>(a.data()), &lda, const_cast<float*>(b.data()), &ldb,
+         &fone, c.data(), &ldc);
 }

-template<typename A,typename B>
-void blas_gemm(const A& a, const B& b, MatrixXd& c)
-{
-  int M = c.rows(); int N = c.cols(); int K = a.cols();
-  int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
+template <typename A, typename B>
+void blas_gemm(const A& a, const B& b, MatrixXd& c) {
+  int M = c.rows();
+  int N = c.cols();
+  int K = a.cols();
+  int lda = a.outerStride();
+  int ldb = b.outerStride();
+  int ldc = c.rows();

-  dgemm_(&transA,&transB,&M,&N,&K,&done,
-         const_cast<double*>(a.data()),&lda,
-         const_cast<double*>(b.data()),&ldb,&done,
-         c.data(),&ldc);
+  dgemm_(&transA, &transB, &M, &N, &K, &done, const_cast<double*>(a.data()), &lda, const_cast<double*>(b.data()), &ldb,
+         &done, c.data(), &ldc);
 }

-template<typename A,typename B>
-void blas_gemm(const A& a, const B& b, MatrixXcf& c)
-{
-  int M = c.rows(); int N = c.cols(); int K = a.cols();
-  int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
+template <typename A, typename B>
+void blas_gemm(const A& a, const B& b, MatrixXcf& c) {
+  int M = c.rows();
+  int N = c.cols();
+  int K = a.cols();
+  int lda = a.outerStride();
+  int ldb = b.outerStride();
+  int ldc = c.rows();

-  cgemm_(&transA,&transB,&M,&N,&K,(float*)&cfone,
-         const_cast<float*>((const float*)a.data()),&lda,
-         const_cast<float*>((const float*)b.data()),&ldb,(float*)&cfone,
-         (float*)c.data(),&ldc);
+  cgemm_(&transA, &transB, &M, &N, &K, (float*)&cfone, const_cast<float*>((const float*)a.data()), &lda,
+         const_cast<float*>((const float*)b.data()), &ldb, (float*)&cfone, (float*)c.data(), &ldc);
 }

-template<typename A,typename B>
-void blas_gemm(const A& a, const B& b, MatrixXcd& c)
-{
-  int M = c.rows(); int N = c.cols(); int K = a.cols();
-  int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
+template <typename A, typename B>
+void blas_gemm(const A& a, const B& b, MatrixXcd& c) {
+  int M = c.rows();
+  int N = c.cols();
+  int K = a.cols();
+  int lda = a.outerStride();
+  int ldb = b.outerStride();
+  int ldc = c.rows();

-  zgemm_(&transA,&transB,&M,&N,&K,(double*)&cdone,
-         const_cast<double*>((const double*)a.data()),&lda,
-         const_cast<double*>((const double*)b.data()),&ldb,(double*)&cdone,
-         (double*)c.data(),&ldc);
+  zgemm_(&transA, &transB, &M, &N, &K, (double*)&cdone, const_cast<double*>((const double*)a.data()), &lda,
+         const_cast<double*>((const double*)b.data()), &ldb, (double*)&cdone, (double*)c.data(), &ldc);
 }

-
-
 #endif

-void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci)
-{
+void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) {
  cr.noalias() += ar * br;
  cr.noalias() -= ai * bi;
  ci.noalias() += ar * bi;
@ -144,33 +144,27 @@ void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr,
  // [cr ci] += [ar ai] * br + [-ai ar] * bi
 }

-void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci)
-{
+void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) {
  cr.noalias() += a * br;
  ci.noalias() += a * bi;
 }

-void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci)
-{
+void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) {
  cr.noalias() += ar * b;
  ci.noalias() += ai * b;
 }

-
-
-template<typename A, typename B, typename C>
-EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c)
-{
+template <typename A, typename B, typename C>
+EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) {
  c.noalias() += a * b;
 }

-int main(int argc, char ** argv)
-{
+int main(int argc, char** argv) {
  std::ptrdiff_t l1 = internal::queryL1CacheSize();
  std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
-  std::cout << "L1 cache size     = " << (l1>0 ? l1/1024 : -1) << " KB\n";
-  std::cout << "L2/L3 cache size  = " << (l2>0 ? l2/1024 : -1) << " KB\n";
-  typedef internal::gebp_traits<Scalar,Scalar> Traits;
+  std::cout << "L1 cache size     = " << (l1 > 0 ? l1 / 1024 : -1) << " KB\n";
+  std::cout << "L2/L3 cache size  = " << (l2 > 0 ? l2 / 1024 : -1) << " KB\n";
+  typedef internal::gebp_traits<Scalar, Scalar> Traits;
  std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";

  int rep = 1;    // number of repetitions per try
@ -180,196 +174,220 @@ int main(int argc, char ** argv)
  int m = s;
  int n = s;
  int p = s;
-  int cache_size1=-1, cache_size2=l2, cache_size3 = 0;
+  int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0;

  bool need_help = false;
-  for (int i=1; i<argc;)
-  {
-    if(argv[i][0]=='-')
-    {
-      if(argv[i][1]=='s')
-      {
+  for (int i = 1; i < argc;) {
+    if (argv[i][0] == '-') {
+      if (argv[i][1] == 's') {
        ++i;
        s = atoi(argv[i++]);
        m = n = p = s;
-        if(argv[i][0]!='-')
-        {
+        if (argv[i][0] != '-') {
          n = atoi(argv[i++]);
          p = atoi(argv[i++]);
        }
-      }
-      else if(argv[i][1]=='c')
-      {
+      } else if (argv[i][1] == 'c') {
        ++i;
        cache_size1 = atoi(argv[i++]);
-        if(argv[i][0]!='-')
-        {
+        if (argv[i][0] != '-') {
          cache_size2 = atoi(argv[i++]);
-          if(argv[i][0]!='-')
-            cache_size3 = atoi(argv[i++]);
+          if (argv[i][0] != '-') cache_size3 = atoi(argv[i++]);
        }
-      }
-      else if(argv[i][1]=='t')
-      {
+      } else if (argv[i][1] == 't') {
        tries = atoi(argv[++i]);
        ++i;
-      }
-      else if(argv[i][1]=='p')
-      {
+      } else if (argv[i][1] == 'p') {
        ++i;
        rep = atoi(argv[i++]);
      }
-    }
-    else
-    {
+    } else {
      need_help = true;
      break;
    }
  }

-  if(need_help)
-  {
+  if (need_help) {
    std::cout << argv[0] << " -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
    std::cout << "   <matrix sizes> : size\n";
    std::cout << "   <matrix sizes> : rows columns depth\n";
    return 1;
  }

-#if EIGEN_VERSION_AT_LEAST(3,2,90)
-  if(cache_size1>0)
-    setCpuCacheSizes(cache_size1,cache_size2,cache_size3);
+#if EIGEN_VERSION_AT_LEAST(3, 2, 90)
+  if (cache_size1 > 0) setCpuCacheSizes(cache_size1, cache_size2, cache_size3);
 #endif
-  
-  A a(m,p); a.setRandom();
-  B b(p,n); b.setRandom();
-  C c(m,n); c.setOnes();
+
+  A a(m, p);
+  a.setRandom();
+  B b(p, n);
+  b.setRandom();
+  C c(m, n);
+  c.setOnes();
  C rc = c;

  std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
  std::ptrdiff_t mc(m), nc(n), kc(p);
-  internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
+  internal::computeProductBlockingSizes<Scalar, Scalar>(kc, mc, nc);
  std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << " x " << nc << "\n";

  C r = c;

-  // check the parallel product is correct
-  #if defined EIGEN_HAS_OPENMP
+// check the parallel product is correct
+#if defined EIGEN_HAS_OPENMP
  Eigen::initParallel();
  int procs = omp_get_max_threads();
-  if(procs>1)
-  {
-    #ifdef HAVE_BLAS
-    blas_gemm(a,b,r);
-    #else
+  if (procs > 1) {
+#ifdef HAVE_BLAS
+    blas_gemm(a, b, r);
+#else
    omp_set_num_threads(1);
    r.noalias() += a * b;
    omp_set_num_threads(procs);
-    #endif
+#endif
    c.noalias() += a * b;
-    if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
+    if (!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
  }
-  #elif defined HAVE_BLAS
-    blas_gemm(a,b,r);
-    c.noalias() += a * b;
-    if(!r.isApprox(c)) {
-      std::cout << (r  - c).norm()/r.norm() << "\n";
+#elif defined HAVE_BLAS
+  blas_gemm(a, b, r);
+  c.noalias() += a * b;
+  if (!r.isApprox(c)) {
+    std::cout << (r - c).norm() / r.norm() << "\n";
+    std::cerr << "Warning, your product is crap!\n\n";
+  }
+#else
+  if (1. * m * n * p < 2000. * 2000 * 2000) {
+    gemm(a, b, c);
+    r.noalias() += a.cast<Scalar>().lazyProduct(b.cast<Scalar>());
+    if (!r.isApprox(c)) {
+      std::cout << (r - c).norm() / r.norm() << "\n";
      std::cerr << "Warning, your product is crap!\n\n";
    }
-  #else
-    if(1.*m*n*p<2000.*2000*2000)
-    {
-      gemm(a,b,c);
-      r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
-      if(!r.isApprox(c)) {
-        std::cout << (r  - c).norm()/r.norm() << "\n";
-        std::cerr << "Warning, your product is crap!\n\n";
-      }
-    }
-  #endif
+  }
+#endif

-  #ifdef HAVE_BLAS
+#ifdef HAVE_BLAS
  BenchTimer tblas;
  c = rc;
-  BENCH(tblas, tries, rep, blas_gemm(a,b,c));
-  std::cout << "blas  cpu         " << tblas.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tblas.total(CPU_TIMER)  << "s)\n";
-  std::cout << "blas  real        " << tblas.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n";
-  #endif
+  BENCH(tblas, tries, rep, blas_gemm(a, b, c));
+  std::cout << "blas  cpu         " << tblas.best(CPU_TIMER) / rep << "s  \t"
+            << (double(m) * n * p * rep * 2 / tblas.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER)
+            << "s)\n";
+  std::cout << "blas  real        " << tblas.best(REAL_TIMER) / rep << "s  \t"
+            << (double(m) * n * p * rep * 2 / tblas.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER)
+            << "s)\n";
+#endif

  // warm start
-  if(b.norm()+a.norm()==123.554) std::cout << "\n";
+  if (b.norm() + a.norm() == 123.554) std::cout << "\n";

  BenchTimer tmt;
  c = rc;
-  BENCH(tmt, tries, rep, gemm(a,b,c));
-  std::cout << "eigen cpu         " << tmt.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmt.total(CPU_TIMER)  << "s)\n";
-  std::cout << "eigen real        " << tmt.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
+  BENCH(tmt, tries, rep, gemm(a, b, c));
+  std::cout << "eigen cpu         " << tmt.best(CPU_TIMER) / rep << "s  \t"
+            << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
+            << "s)\n";
+  std::cout << "eigen real        " << tmt.best(REAL_TIMER) / rep << "s  \t"
+            << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
+            << "s)\n";

-  #ifdef EIGEN_HAS_OPENMP
-  if(procs>1)
-  {
+#ifdef EIGEN_HAS_OPENMP
+  if (procs > 1) {
    BenchTimer tmono;
    omp_set_num_threads(1);
    Eigen::setNbThreads(1);
    c = rc;
-    BENCH(tmono, tries, rep, gemm(a,b,c));
-    std::cout << "eigen mono cpu    " << tmono.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmono.total(CPU_TIMER)  << "s)\n";
-    std::cout << "eigen mono real   " << tmono.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n";
-    std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)  << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n";
+    BENCH(tmono, tries, rep, gemm(a, b, c));
+    std::cout << "eigen mono cpu    " << tmono.best(CPU_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / tmono.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER)
+              << "s)\n";
+    std::cout << "eigen mono real   " << tmono.best(REAL_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / tmono.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t("
+              << tmono.total(REAL_TIMER) << "s)\n";
+    std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => "
+              << (100.0 * tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)) / procs << "%\n";
  }
-  #endif
-  
-  if(1.*m*n*p<30*30*30)
-  {
+#endif
+
+  if (1. * m * n * p < 30 * 30 * 30) {
    BenchTimer tmt;
    c = rc;
-    BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b));
-    std::cout << "lazy cpu         " << tmt.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmt.total(CPU_TIMER)  << "s)\n";
-    std::cout << "lazy real        " << tmt.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
+    BENCH(tmt, tries, rep, c.noalias() += a.lazyProduct(b));
+    std::cout << "lazy cpu         " << tmt.best(CPU_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
+              << "s)\n";
+    std::cout << "lazy real        " << tmt.best(REAL_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
+              << "s)\n";
  }
-  
-  #ifdef DECOUPLED
-  if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
-  {
-    M ar(m,p); ar.setRandom();
-    M ai(m,p); ai.setRandom();
-    M br(p,n); br.setRandom();
-    M bi(p,n); bi.setRandom();
-    M cr(m,n); cr.setRandom();
-    M ci(m,n); ci.setRandom();
-    
+
+#ifdef DECOUPLED
+  if ((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
+    M ar(m, p);
+    ar.setRandom();
+    M ai(m, p);
+    ai.setRandom();
+    M br(p, n);
+    br.setRandom();
+    M bi(p, n);
+    bi.setRandom();
+    M cr(m, n);
+    cr.setRandom();
+    M ci(m, n);
+    ci.setRandom();
+
    BenchTimer t;
-    BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci));
-    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
-    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
+    BENCH(t, tries, rep, matlab_cplx_cplx(ar, ai, br, bi, cr, ci));
+    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
+              << "s)\n";
+    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
+              << "s)\n";
  }
-  if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
-  {
-    M a(m,p);  a.setRandom();
-    M br(p,n); br.setRandom();
-    M bi(p,n); bi.setRandom();
-    M cr(m,n); cr.setRandom();
-    M ci(m,n); ci.setRandom();
-    
+  if ((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
+    M a(m, p);
+    a.setRandom();
+    M br(p, n);
+    br.setRandom();
+    M bi(p, n);
+    bi.setRandom();
+    M cr(m, n);
+    cr.setRandom();
+    M ci(m, n);
+    ci.setRandom();
+
    BenchTimer t;
-    BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci));
-    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
-    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
+    BENCH(t, tries, rep, matlab_real_cplx(a, br, bi, cr, ci));
+    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
+              << "s)\n";
+    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
+              << "s)\n";
  }
-  if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex))
-  {
-    M ar(m,p); ar.setRandom();
-    M ai(m,p); ai.setRandom();
-    M b(p,n);  b.setRandom();
-    M cr(m,n); cr.setRandom();
-    M ci(m,n); ci.setRandom();
-    
+  if ((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex)) {
+    M ar(m, p);
+    ar.setRandom();
+    M ai(m, p);
+    ai.setRandom();
+    M b(p, n);
+    b.setRandom();
+    M cr(m, n);
+    cr.setRandom();
+    M ci(m, n);
+    ci.setRandom();
+
    BenchTimer t;
-    BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci));
-    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
-    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
+    BENCH(t, tries, rep, matlab_cplx_real(ar, ai, b, cr, ci));
+    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
+              << "s)\n";
+    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER) / rep << "s  \t"
+              << (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
+              << "s)\n";
  }
-  #endif
+#endif

  return 0;
 }
-
--- a/bench/bench_move_semantics.cpp
+++ b/bench/bench_move_semantics.cpp
@ -16,23 +16,20 @@
 #include <utility>

 template <typename MatrixType>
-void copy_matrix(MatrixType& m)
-{
+void copy_matrix(MatrixType& m) {
  MatrixType tmp(m);
  m = tmp;
 }

 template <typename MatrixType>
-void move_matrix(MatrixType&& m)
-{
+void move_matrix(MatrixType&& m) {
  MatrixType tmp(std::move(m));
  m = std::move(tmp);
 }

-template<typename Scalar>
-void bench(const std::string& label)
-{
-  using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>,1,10>;
+template <typename Scalar>
+void bench(const std::string& label) {
+  using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
  Eigen::BenchTimer t;

  int tries = 10;
@ -42,16 +39,14 @@ void bench(const std::string& label)
  MatrixType dest;

  BENCH(t, tries, rep, copy_matrix(data));
-  std::cout << label << " copy semantics: " << 1e3*t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
+  std::cout << label << " copy semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;

  BENCH(t, tries, rep, move_matrix(std::move(data)));
-  std::cout << label << " move semantics: " << 1e3*t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
+  std::cout << label << " move semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
 }

-int main()
-{
+int main() {
  bench<float>("float");
  bench<double>("double");
  return 0;
 }
-
--- a/bench/bench_norm.cpp
+++ b/bench/bench_norm.cpp
@ -5,79 +5,64 @@
 using namespace Eigen;
 using namespace std;

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
-{
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) {
  return v.norm();
 }

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
-{
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) {
  return v.stableNorm();
 }

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
-{
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) {
  return v.hypotNorm();
 }

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
-{
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) {
  return v.blueNorm();
 }

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
-{
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) {
  typedef typename T::Scalar Scalar;
  int n = v.size();
  Scalar scale = 0;
  Scalar ssq = 1;
-  for (int i=0;i<n;++i)
-  {
+  for (int i = 0; i < n; ++i) {
    Scalar ax = std::abs(v.coeff(i));
-    if (scale >= ax)
-    {
-      ssq += numext::abs2(ax/scale);
-    }
-    else
-    {
-      ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
+    if (scale >= ax) {
+      ssq += numext::abs2(ax / scale);
+    } else {
+      ssq = Scalar(1) + ssq * numext::abs2(scale / ax);
      scale = ax;
    }
  }
  return scale * std::sqrt(ssq);
 }

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
-{
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) {
  typedef typename T::Scalar Scalar;
  Scalar s = v.array().abs().maxCoeff();
-  return s*(v/s).norm();
+  return s * (v / s).norm();
 }

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v)
-{
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) {
  return v.stableNorm();
 }

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
-{
-  int n =v.size() / 2;
-  for (int i=0;i<n;++i)
-    v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
-  n = n/2;
-  while (n>0)
-  {
-    for (int i=0;i<n;++i)
-      v(i) = v(2*i) + v(2*i+1);
-    n = n/2;
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) {
+  int n = v.size() / 2;
+  for (int i = 0; i < n; ++i) v(i) = v(2 * i) * v(2 * i) + v(2 * i + 1) * v(2 * i + 1);
+  n = n / 2;
+  while (n > 0) {
+    for (int i = 0; i < n; ++i) v(i) = v(2 * i) + v(2 * i + 1);
+    n = n / 2;
  }
  return std::sqrt(v(0));
 }
@ -85,61 +70,61 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
 namespace Eigen {
 namespace internal {
 #ifdef EIGEN_VECTORIZE
-Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
-Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
+Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a, b); }
+Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a, b); }

-Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
-Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
+Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a, b); }
+Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a, b); }
 #endif
-}
-}
+}  // namespace internal
+}  // namespace Eigen

-template<typename T>
-EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
-{
-  #ifndef EIGEN_VECTORIZE
+template <typename T>
+EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) {
+#ifndef EIGEN_VECTORIZE
  return v.blueNorm();
-  #else
+#else
  typedef typename T::Scalar Scalar;

  static int nmax = 0;
  static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
  int n;

-  if(nmax <= 0)
-  {
+  if (nmax <= 0) {
    int nbig, ibeta, it, iemin, iemax, iexp;
    Scalar abig, eps;

-    nbig  = NumTraits<int>::highest();          // largest integer
-    ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base;                    // base for floating-point numbers
-    it    = NumTraits<Scalar>::digits();        // NumTraits<Scalar>::Mantissa;                // number of base-beta digits in mantissa
+    nbig = NumTraits<int>::highest();            // largest integer
+    ibeta = std::numeric_limits<Scalar>::radix;  // NumTraits<Scalar>::Base;                    // base for
+                                                 // floating-point numbers
+    it = NumTraits<Scalar>::digits();  // NumTraits<Scalar>::Mantissa;                // number of base-beta digits in
+                                       // mantissa
    iemin = NumTraits<Scalar>::min_exponent();  // minimum exponent
    iemax = NumTraits<Scalar>::max_exponent();  // maximum exponent
-    rbig  = NumTraits<Scalar>::highest();       // largest floating-point number
+    rbig = NumTraits<Scalar>::highest();        // largest floating-point number

    // Check the basic machine-dependent constants.
-    if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
-      || (it<=4 && ibeta <= 3 ) || it<2)
-    {
+    if (iemin > 1 - 2 * it || 1 + it > iemax || (it == 2 && ibeta < 5) || (it <= 4 && ibeta <= 3) || it < 2) {
      eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
    }
-    iexp  = -((1-iemin)/2);
-    b1    = std::pow(ibeta, iexp);  // lower boundary of midrange
-    iexp  = (iemax + 1 - it)/2;
-    b2    = std::pow(ibeta,iexp);   // upper boundary of midrange
+    iexp = -((1 - iemin) / 2);
+    b1 = std::pow(ibeta, iexp);  // lower boundary of midrange
+    iexp = (iemax + 1 - it) / 2;
+    b2 = std::pow(ibeta, iexp);  // upper boundary of midrange

-    iexp  = (2-iemin)/2;
-    s1m   = std::pow(ibeta,iexp);   // scaling factor for lower range
-    iexp  = - ((iemax+it)/2);
-    s2m   = std::pow(ibeta,iexp);   // scaling factor for upper range
+    iexp = (2 - iemin) / 2;
+    s1m = std::pow(ibeta, iexp);  // scaling factor for lower range
+    iexp = -((iemax + it) / 2);
+    s2m = std::pow(ibeta, iexp);  // scaling factor for upper range

-    overfl  = rbig*s2m;          // overflow boundary for abig
-    eps     = std::pow(ibeta, 1-it);
-    relerr  = std::sqrt(eps);      // tolerance for neglecting asml
-    abig    = 1.0/eps - 1.0;
-    if (Scalar(nbig)>abig)  nmax = abig;  // largest safe n
-    else                    nmax = nbig;
+    overfl = rbig * s2m;  // overflow boundary for abig
+    eps = std::pow(ibeta, 1 - it);
+    relerr = std::sqrt(eps);  // tolerance for neglecting asml
+    abig = 1.0 / eps - 1.0;
+    if (Scalar(nbig) > abig)
+      nmax = abig;  // largest safe n
+    else
+      nmax = nbig;
  }

  typedef typename internal::packet_traits<Scalar>::type Packet;
@ -149,108 +134,103 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
  Packet pabig = internal::pset1<Packet>(Scalar(0));
  Packet ps2m = internal::pset1<Packet>(s2m);
  Packet ps1m = internal::pset1<Packet>(s1m);
-  Packet pb2  = internal::pset1<Packet>(b2);
-  Packet pb1  = internal::pset1<Packet>(b1);
-  for(int j=0; j<v.size(); j+=ps)
-  {
+  Packet pb2 = internal::pset1<Packet>(b2);
+  Packet pb1 = internal::pset1<Packet>(b1);
+  for (int j = 0; j < v.size(); j += ps) {
    Packet ax = internal::pabs(v.template packet<Aligned>(j));
-    Packet ax_s2m = internal::pmul(ax,ps2m);
-    Packet ax_s1m = internal::pmul(ax,ps1m);
-    Packet maskBig = internal::plt(pb2,ax);
-    Packet maskSml = internal::plt(ax,pb1);
+    Packet ax_s2m = internal::pmul(ax, ps2m);
+    Packet ax_s1m = internal::pmul(ax, ps1m);
+    Packet maskBig = internal::plt(pb2, ax);
+    Packet maskSml = internal::plt(ax, pb1);

-//     Packet maskMed = internal::pand(maskSml,maskBig);
-//     Packet scale = internal::pset1(Scalar(0));
-//     scale = internal::por(scale, internal::pand(maskBig,ps2m));
-//     scale = internal::por(scale, internal::pand(maskSml,ps1m));
-//     scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
-//     ax = internal::pmul(ax,scale);
-//     ax = internal::pmul(ax,ax);
-//     pabig = internal::padd(pabig, internal::pand(maskBig, ax));
-//     pasml = internal::padd(pasml, internal::pand(maskSml, ax));
-//     pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
+    //     Packet maskMed = internal::pand(maskSml,maskBig);
+    //     Packet scale = internal::pset1(Scalar(0));
+    //     scale = internal::por(scale, internal::pand(maskBig,ps2m));
+    //     scale = internal::por(scale, internal::pand(maskSml,ps1m));
+    //     scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
+    //     ax = internal::pmul(ax,scale);
+    //     ax = internal::pmul(ax,ax);
+    //     pabig = internal::padd(pabig, internal::pand(maskBig, ax));
+    //     pasml = internal::padd(pasml, internal::pand(maskSml, ax));
+    //     pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));

-
-    pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
-    pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
-    pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
+    pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m, ax_s2m)));
+    pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m, ax_s1m)));
+    pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax, ax), internal::pand(maskSml, maskBig)));
  }
  Scalar abig = internal::predux(pabig);
  Scalar asml = internal::predux(pasml);
  Scalar amed = internal::predux(pamed);
-  if(abig > Scalar(0))
-  {
+  if (abig > Scalar(0)) {
    abig = std::sqrt(abig);
-    if(abig > overfl)
-    {
+    if (abig > overfl) {
      eigen_assert(false && "overflow");
      return rbig;
    }
-    if(amed > Scalar(0))
-    {
-      abig = abig/s2m;
+    if (amed > Scalar(0)) {
+      abig = abig / s2m;
      amed = std::sqrt(amed);
-    }
-    else
-    {
-      return abig/s2m;
+    } else {
+      return abig / s2m;
    }

-  }
-  else if(asml > Scalar(0))
-  {
-    if (amed > Scalar(0))
-    {
+  } else if (asml > Scalar(0)) {
+    if (amed > Scalar(0)) {
      abig = std::sqrt(amed);
      amed = std::sqrt(asml) / s1m;
+    } else {
+      return std::sqrt(asml) / s1m;
    }
-    else
-    {
-      return std::sqrt(asml)/s1m;
-    }
-  }
-  else
-  {
+  } else {
    return std::sqrt(amed);
  }
  asml = std::min(abig, amed);
  abig = std::max(abig, amed);
-  if(asml <= abig*relerr)
+  if (asml <= abig * relerr)
    return abig;
  else
-    return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
-  #endif
+    return abig * std::sqrt(Scalar(1) + numext::abs2(asml / abig));
+#endif
 }

-#define BENCH_PERF(NRM) { \
-  float af = 0; double ad = 0; std::complex<float> ac = 0; \
-  Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
-  for (int k=0; k<tries; ++k) { \
-    tf.start(); \
-    for (int i=0; i<iters; ++i) { af += NRM(vf); } \
-    tf.stop(); \
-  } \
-  for (int k=0; k<tries; ++k) { \
-    td.start(); \
-    for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
-    td.stop(); \
-  } \
-  /*for (int k=0; k<std::max(1,tries/3); ++k) { \
-    tcf.start(); \
-    for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
-    tcf.stop(); \
-  } */\
-  std::cout << #NRM << "\t" << tf.value() << "   " << td.value() <<  "    " << tcf.value() << "\n"; \
-}
+#define BENCH_PERF(NRM)                                                                              \
+  {                                                                                                  \
+    float af = 0;                                                                                    \
+    double ad = 0;                                                                                   \
+    std::complex<float> ac = 0;                                                                      \
+    Eigen::BenchTimer tf, td, tcf;                                                                   \
+    tf.reset();                                                                                      \
+    td.reset();                                                                                      \
+    tcf.reset();                                                                                     \
+    for (int k = 0; k < tries; ++k) {                                                                \
+      tf.start();                                                                                    \
+      for (int i = 0; i < iters; ++i) {                                                              \
+        af += NRM(vf);                                                                               \
+      }                                                                                              \
+      tf.stop();                                                                                     \
+    }                                                                                                \
+    for (int k = 0; k < tries; ++k) {                                                                \
+      td.start();                                                                                    \
+      for (int i = 0; i < iters; ++i) {                                                              \
+        ad += NRM(vd);                                                                               \
+      }                                                                                              \
+      td.stop();                                                                                     \
+    }                                                                                                \
+    /*for (int k=0; k<std::max(1,tries/3); ++k) {                                                    \
+      tcf.start();                                                                                   \
+      for (int i=0; i<iters; ++i) { ac += NRM(vcf); }                                                \
+      tcf.stop();                                                                                    \
+    } */                                                                                             \
+    std::cout << #NRM << "\t" << tf.value() << "   " << td.value() << "    " << tcf.value() << "\n"; \
+  }

-void check_accuracy(double basef, double based, int s)
-{
+void check_accuracy(double basef, double based, int s) {
  double yf = basef * std::abs(internal::random<double>());
  double yd = based * std::abs(internal::random<double>());
  VectorXf vf = VectorXf::Ones(s) * yf;
  VectorXd vd = VectorXd::Ones(s) * yd;

-  std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
+  std::cout << "reference\t" << std::sqrt(double(s)) * yf << "\t" << std::sqrt(double(s)) * yd << "\n";
  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
@ -260,34 +240,38 @@ void check_accuracy(double basef, double based, int s)
  std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
 }

-void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
-{
+void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) {
  VectorXf vf(s);
  VectorXd vd(s);
-  for (int i=0; i<s; ++i)
-  {
-    vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
-    vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
+  for (int i = 0; i < s; ++i) {
+    vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0, ef1));
+    vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0, ed1));
  }

-  //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
-  std::cout << "sqsumNorm\t"  << sqsumNorm(vf)  << "\t" << sqsumNorm(vd)  << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
-  std::cout << "hypotNorm\t"  << hypotNorm(vf)  << "\t" << hypotNorm(vd)  << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
-  std::cout << "blueNorm\t"   << blueNorm(vf)   << "\t" << blueNorm(vd)   << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
-  std::cout << "pblueNorm\t"  << pblueNorm(vf)  << "\t" << pblueNorm(vd)  << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
-  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
-  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
-//   std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
+  // std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
+  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>())
+            << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
+  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>())
+            << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
+  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t"
+            << blueNorm(vd.cast<long double>()) << "\n";
+  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>())
+            << "\t" << blueNorm(vd.cast<long double>()) << "\n";
+  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>())
+            << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
+  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t"
+            << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
+  //   std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long
+  //   double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
 }

-int main(int argc, char** argv)
-{
+int main(int argc, char** argv) {
  int tries = 10;
  int iters = 100000;
  double y = 1.1345743233455785456788e12 * internal::random<double>();
  VectorXf v = VectorXf::Ones(1024) * y;

-// return 0;
+  // return 0;
  int s = 10000;
  double basef_ok = 1.1345743233455785456788e15;
  double based_ok = 1.1345743233455785456788e95;
@ -310,22 +294,20 @@ int main(int argc, char** argv)
  check_accuracy(basef_over, based_over, s);

  std::cerr << "\nVarying (over):\n";
-  for (int k=0; k<1; ++k)
-  {
-    check_accuracy_var(20,27,190,302,s);
+  for (int k = 0; k < 1; ++k) {
+    check_accuracy_var(20, 27, 190, 302, s);
    std::cout << "\n";
  }

  std::cerr << "\nVarying (under):\n";
-  for (int k=0; k<1; ++k)
-  {
-    check_accuracy_var(-27,20,-302,-190,s);
+  for (int k = 0; k < 1; ++k) {
+    check_accuracy_var(-27, 20, -302, -190, s);
    std::cout << "\n";
  }

  y = 1;
  std::cout.precision(4);
-  int s1 = 1024*1024*32;
+  int s1 = 1024 * 1024 * 32;
  std::cerr << "Performance (out of cache, " << s1 << "):\n";
  {
    int iters = 1;
--- a/bench/bench_reverse.cpp
+++ b/bench/bench_reverse.cpp
@ -15,70 +15,62 @@ using namespace Eigen;
 typedef double Scalar;

 template <typename MatrixType>
-__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
-{
+__attribute__((noinline)) void bench_reverse(const MatrixType& m) {
  int rows = m.rows();
  int cols = m.cols();
  int size = m.size();

-  int repeats = (REPEAT*1000)/size;
-  MatrixType a = MatrixType::Random(rows,cols);
-  MatrixType b = MatrixType::Random(rows,cols);
+  int repeats = (REPEAT * 1000) / size;
+  MatrixType a = MatrixType::Random(rows, cols);
+  MatrixType b = MatrixType::Random(rows, cols);

  BenchTimer timerB, timerH, timerV;

  Scalar acc = 0;
-  int r = internal::random<int>(0,rows-1);
-  int c = internal::random<int>(0,cols-1);
-  for (int t=0; t<TRIES; ++t)
-  {
+  int r = internal::random<int>(0, rows - 1);
+  int c = internal::random<int>(0, cols - 1);
+  for (int t = 0; t < TRIES; ++t) {
    timerB.start();
-    for (int k=0; k<repeats; ++k)
-    {
+    for (int k = 0; k < repeats; ++k) {
      asm("#begin foo");
      b = a.reverse();
      asm("#end foo");
-      acc += b.coeff(r,c);
+      acc += b.coeff(r, c);
    }
    timerB.stop();
  }

-  if (MatrixType::RowsAtCompileTime==Dynamic)
+  if (MatrixType::RowsAtCompileTime == Dynamic)
    std::cout << "dyn   ";
  else
    std::cout << "fixed ";
-  std::cout << rows << " x " << cols << " \t"
-            << (timerB.value() * REPEAT) / repeats << "s "
-            << "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t";
+  std::cout << rows << " x " << cols << " \t" << (timerB.value() * REPEAT) / repeats << "s "
+            << "(" << 1e-6 * size * repeats / timerB.value() << " MFLOPS)\t";

  std::cout << "\n";
  // make sure the compiler does not optimize too much
-  if (acc==123)
-    std::cout << acc;
+  if (acc == 123) std::cout << acc;
 }

-int main(int argc, char* argv[])
-{
-  const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
+int main(int argc, char* argv[]) {
+  const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 0};
  std::cout << "size            no sqrt                           standard";
-//   #ifdef BENCH_GSL
-//   std::cout << "       GSL (standard + double + ATLAS)  ";
-//   #endif
+  //   #ifdef BENCH_GSL
+  //   std::cout << "       GSL (standard + double + ATLAS)  ";
+  //   #endif
  std::cout << "\n";
-  for (uint i=0; dynsizes[i]>0; ++i)
-  {
-    bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
-    bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i]));
+  for (uint i = 0; dynsizes[i] > 0; ++i) {
+    bench_reverse(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
+    bench_reverse(Matrix<Scalar, Dynamic, 1>(dynsizes[i] * dynsizes[i]));
  }
-//   bench_reverse(Matrix<Scalar,2,2>());
-//   bench_reverse(Matrix<Scalar,3,3>());
-//   bench_reverse(Matrix<Scalar,4,4>());
-//   bench_reverse(Matrix<Scalar,5,5>());
-//   bench_reverse(Matrix<Scalar,6,6>());
-//   bench_reverse(Matrix<Scalar,7,7>());
-//   bench_reverse(Matrix<Scalar,8,8>());
-//   bench_reverse(Matrix<Scalar,12,12>());
-//   bench_reverse(Matrix<Scalar,16,16>());
+  //   bench_reverse(Matrix<Scalar,2,2>());
+  //   bench_reverse(Matrix<Scalar,3,3>());
+  //   bench_reverse(Matrix<Scalar,4,4>());
+  //   bench_reverse(Matrix<Scalar,5,5>());
+  //   bench_reverse(Matrix<Scalar,6,6>());
+  //   bench_reverse(Matrix<Scalar,7,7>());
+  //   bench_reverse(Matrix<Scalar,8,8>());
+  //   bench_reverse(Matrix<Scalar,12,12>());
+  //   bench_reverse(Matrix<Scalar,16,16>());
  return 0;
 }
-
--- a/bench/bench_sum.cpp
+++ b/bench/bench_sum.cpp
@ -3,15 +3,13 @@
 using namespace Eigen;
 using namespace std;

-int main() 
-{
-  typedef Matrix<SCALAR,Eigen::Dynamic,1> Vec;
+int main() {
+  typedef Matrix<SCALAR, Eigen::Dynamic, 1> Vec;
  Vec v(SIZE);
  v.setZero();
  v[0] = 1;
  v[1] = 2;
-  for(int i = 0; i < 1000000; i++)
-  {
+  for (int i = 0; i < 1000000; i++) {
    v.coeffRef(0) += v.sum() * SCALAR(1e-20);
  }
  cout << v.sum() << endl;
--- a/bench/benchmark-blocking-sizes.cpp
+++ b/bench/benchmark-blocking-sizes.cpp
@ -59,14 +59,12 @@ static_assert(maxsize > minsize, "maxsize must be larger than minsize");
 static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");

 // just a helper to store a triple of K,M,N sizes for matrix product
-struct size_triple_t
-{
+struct size_triple_t {
  size_t k, m, n;
  size_triple_t() : k(0), m(0), n(0) {}
  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
-  size_triple_t(uint16_t compact)
-  {
+  size_triple_t(uint16_t compact) {
    k = 1 << ((compact & 0xf00) >> 8);
    m = 1 << ((compact & 0x0f0) >> 4);
    n = 1 << ((compact & 0x00f) >> 0);
@ -82,50 +80,35 @@ uint8_t log2_pot(size_t x) {
 // Convert between size tripes and a compact form fitting in 12 bits
 // where each size, which must be a POT, is encoded as its log2, on 4 bits
 // so the largest representable size is 2^15 == 32k  ... big enough.
-uint16_t compact_size_triple(size_t k, size_t m, size_t n)
-{
+uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
 }

-uint16_t compact_size_triple(const size_triple_t& t)
-{
-  return compact_size_triple(t.k, t.m, t.n);
-}
+uint16_t compact_size_triple(const size_triple_t& t) { return compact_size_triple(t.k, t.m, t.n); }

 // A single benchmark. Initially only contains benchmark params.
 // Then call run(), which stores the result in the gflops field.
-struct benchmark_t
-{
+struct benchmark_t {
  uint16_t compact_product_size;
  uint16_t compact_block_size;
  bool use_default_block_size;
  float gflops;
-  benchmark_t()
-    : compact_product_size(0)
-    , compact_block_size(0)
-    , use_default_block_size(false)
-    , gflops(0)
-  {
-  }
-  benchmark_t(size_t pk, size_t pm, size_t pn,
-              size_t bk, size_t bm, size_t bn)
-    : compact_product_size(compact_size_triple(pk, pm, pn))
-    , compact_block_size(compact_size_triple(bk, bm, bn))
-    , use_default_block_size(false)
-    , gflops(0)
-  {}
+  benchmark_t() : compact_product_size(0), compact_block_size(0), use_default_block_size(false), gflops(0) {}
+  benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn)
+      : compact_product_size(compact_size_triple(pk, pm, pn)),
+        compact_block_size(compact_size_triple(bk, bm, bn)),
+        use_default_block_size(false),
+        gflops(0) {}
  benchmark_t(size_t pk, size_t pm, size_t pn)
-    : compact_product_size(compact_size_triple(pk, pm, pn))
-    , compact_block_size(0)
-    , use_default_block_size(true)
-    , gflops(0)
-  {}
+      : compact_product_size(compact_size_triple(pk, pm, pn)),
+        compact_block_size(0),
+        use_default_block_size(true),
+        gflops(0) {}

  void run();
 };

-ostream& operator<<(ostream& s, const benchmark_t& b)
-{
+ostream& operator<<(ostream& s, const benchmark_t& b) {
  s << hex << b.compact_product_size << dec;
  if (b.use_default_block_size) {
    size_triple_t t(b.compact_product_size);
@ -141,17 +124,14 @@ ostream& operator<<(ostream& s, const benchmark_t& b)

 // We sort first by increasing benchmark parameters,
 // then by decreasing performance.
-bool operator<(const benchmark_t& b1, const benchmark_t& b2)
-{ 
+bool operator<(const benchmark_t& b1, const benchmark_t& b2) {
  return b1.compact_product_size < b2.compact_product_size ||
-           (b1.compact_product_size == b2.compact_product_size && (
-             (b1.compact_block_size < b2.compact_block_size || (
-               b1.compact_block_size == b2.compact_block_size &&
-                 b1.gflops > b2.gflops))));
+         (b1.compact_product_size == b2.compact_product_size &&
+          ((b1.compact_block_size < b2.compact_block_size ||
+            (b1.compact_block_size == b2.compact_block_size && b1.gflops > b2.gflops))));
 }

-void benchmark_t::run()
-{
+void benchmark_t::run() {
  size_triple_t productsizes(compact_product_size);

  if (use_default_block_size) {
@ -168,26 +148,22 @@ void benchmark_t::run()
  // set up the matrix pool

  const size_t combined_three_matrices_sizes =
-    sizeof(Scalar) *
-      (productsizes.k * productsizes.m +
-       productsizes.k * productsizes.n +
-       productsizes.m * productsizes.n);
+      sizeof(Scalar) *
+      (productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n);

  // 64 M is large enough that nobody has a cache bigger than that,
  // while still being small enough that everybody has this much RAM,
  // so conveniently we don't need to special-case platforms here.
  const size_t unlikely_large_cache_size = 64 << 20;

-  const size_t working_set_size =
-    min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
+  const size_t working_set_size = min_working_set_size ? min_working_set_size : unlikely_large_cache_size;

-  const size_t matrix_pool_size =
-    1 + working_set_size / combined_three_matrices_sizes;
+  const size_t matrix_pool_size = 1 + working_set_size / combined_three_matrices_sizes;
+
+  MatrixType* lhs = new MatrixType[matrix_pool_size];
+  MatrixType* rhs = new MatrixType[matrix_pool_size];
+  MatrixType* dst = new MatrixType[matrix_pool_size];

-  MatrixType *lhs = new MatrixType[matrix_pool_size];
-  MatrixType *rhs = new MatrixType[matrix_pool_size];
-  MatrixType *dst = new MatrixType[matrix_pool_size];
-  
  for (size_t i = 0; i < matrix_pool_size; i++) {
    lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
    rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
@ -200,7 +176,6 @@ void benchmark_t::run()
  float time_per_iter = 0.0f;
  size_t matrix_index = 0;
  while (true) {
-
    double starttime = timer.getCpuTime();
    for (int i = 0; i < iters_at_a_time; i++) {
      dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
@ -228,8 +203,7 @@ void benchmark_t::run()
  gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
 }

-void print_cpuinfo()
-{
+void print_cpuinfo() {
 #ifdef __linux__
  cout << "contents of /proc/cpuinfo:" << endl;
  string line;
@ -249,33 +223,30 @@ void print_cpuinfo()
 }

 template <typename T>
-string type_name()
-{
+string type_name() {
  return "unknown";
 }

-template<>
-string type_name<float>()
-{
+template <>
+string type_name<float>() {
  return "float";
 }

-template<>
-string type_name<double>()
-{
+template <>
+string type_name<double>() {
  return "double";
 }

-struct action_t
-{
-  virtual const char* invokation_name() const { abort(); return nullptr; }
+struct action_t {
+  virtual const char* invokation_name() const {
+    abort();
+    return nullptr;
+  }
  virtual void run() const { abort(); }
  virtual ~action_t() {}
 };

-void show_usage_and_exit(int /*argc*/, char* argv[],
-                         const vector<unique_ptr<action_t>>& available_actions)
-{
+void show_usage_and_exit(int /*argc*/, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
  cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
  cerr << "available actions:" << endl << endl;
  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
@ -293,11 +264,10 @@ void show_usage_and_exit(int /*argc*/, char* argv[],
  cerr << "       avoid warm caches." << endl;
  exit(1);
 }
-     
-float measure_clock_speed()
-{
+
+float measure_clock_speed() {
  cerr << "Measuring clock speed...                              \r" << flush;
-          
+
  vector<float> all_gflops;
  for (int i = 0; i < 8; i++) {
    benchmark_t b(1024, 1024, 1024);
@ -315,14 +285,12 @@ float measure_clock_speed()
  return result;
 }

-struct human_duration_t
-{
+struct human_duration_t {
  int seconds;
  human_duration_t(int s) : seconds(s) {}
 };

-ostream& operator<<(ostream& s, const human_duration_t& d)
-{
+ostream& operator<<(ostream& s, const human_duration_t& d) {
  int remainder = d.seconds;
  if (remainder > 3600) {
    int hours = remainder / 3600;
@ -342,8 +310,7 @@ ostream& operator<<(ostream& s, const human_duration_t& d)

 const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";

-void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
-{
+void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run) {
  FILE* file = fopen(filename, "w");
  if (!file) {
    cerr << "Could not open file " << filename << " for writing." << endl;
@ -358,8 +325,7 @@ void serialize_benchmarks(const char* filename, const vector<benchmark_t>& bench
  fclose(file);
 }

-bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
-{
+bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run) {
  FILE* file = fopen(filename, "r");
  if (!file) {
    return false;
@ -382,11 +348,7 @@ bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmark
  return true;
 }

-void try_run_some_benchmarks(
-  vector<benchmark_t>& benchmarks,
-  double time_start,
-  size_t& first_benchmark_to_run)
-{
+void try_run_some_benchmarks(vector<benchmark_t>& benchmarks, double time_start, size_t& first_benchmark_to_run) {
  if (first_benchmark_to_run == benchmarks.size()) {
    return;
  }
@ -402,9 +364,7 @@ void try_run_some_benchmarks(
    time_now = timer.getRealTime();

    // We check clock speed every minute and at the end.
-    if (benchmark_index == benchmarks.size() ||
-        time_now > time_last_clock_speed_measurement + 60.0f)
-    {
+    if (benchmark_index == benchmarks.size() || time_now > time_last_clock_speed_measurement + 60.0f) {
      time_last_clock_speed_measurement = time_now;

      // Ensure that clock speed is as expected
@ -425,8 +385,7 @@ void try_run_some_benchmarks(
        // which invalidates all benchmark results collected so far.
        // Either way, we better restart all over again now.
        if (benchmark_index) {
-          cerr << "Restarting at " << 100.0f * ratio_done
-               << " % because clock speed increased.          " << endl;
+          cerr << "Restarting at " << 100.0f * ratio_done << " % because clock speed increased.          " << endl;
        }
        max_clock_speed = current_clock_speed;
        first_benchmark_to_run = 0;
@ -436,12 +395,9 @@ void try_run_some_benchmarks(
      bool rerun_last_tests = false;

      if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
-        cerr << "Measurements completed so far: "
-             << 100.0f * ratio_done
-             << " %                             " << endl;
-        cerr << "Clock speed seems to be only "
-             << current_clock_speed/max_clock_speed
-             << " times what it used to be." << endl;
+        cerr << "Measurements completed so far: " << 100.0f * ratio_done << " %                             " << endl;
+        cerr << "Clock speed seems to be only " << current_clock_speed / max_clock_speed << " times what it used to be."
+             << endl;

        unsigned int seconds_to_sleep_if_lower_clock_speed = 1;

@ -454,9 +410,8 @@ void try_run_some_benchmarks(
            exit(2);
          }
          rerun_last_tests = true;
-          cerr << "Sleeping "
-               << seconds_to_sleep_if_lower_clock_speed
-               << " s...                                   \r" << endl;
+          cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed << " s...                                   \r"
+               << endl;
          sleep(seconds_to_sleep_if_lower_clock_speed);
          current_clock_speed = measure_clock_speed();
          seconds_to_sleep_if_lower_clock_speed *= 2;
@ -464,8 +419,7 @@ void try_run_some_benchmarks(
      }

      if (rerun_last_tests) {
-        cerr << "Redoing the last "
-             << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
+        cerr << "Redoing the last " << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
             << " % because clock speed had been low.   " << endl;
        return;
      }
@ -486,8 +440,7 @@ void try_run_some_benchmarks(
    // Display progress info on stderr
    if (time_now > time_last_progress_update + 1.0f) {
      time_last_progress_update = time_now;
-      cerr << "Measurements... " << 100.0f * ratio_done
-           << " %, ETA "
+      cerr << "Measurements... " << 100.0f * ratio_done << " %, ETA "
           << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
           << "                          \r" << flush;
    }
@ -498,19 +451,15 @@ void try_run_some_benchmarks(
  }
 }

-void run_benchmarks(vector<benchmark_t>& benchmarks)
-{
+void run_benchmarks(vector<benchmark_t>& benchmarks) {
  size_t first_benchmark_to_run;
  vector<benchmark_t> deserialized_benchmarks;
  bool use_deserialized_benchmarks = false;
  if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
-    cerr << "Found serialized session with "
-         << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
+    cerr << "Found serialized session with " << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
         << " % already done" << endl;
-    if (deserialized_benchmarks.size() == benchmarks.size() &&
-        first_benchmark_to_run > 0 &&
-        first_benchmark_to_run < benchmarks.size())
-    {
+    if (deserialized_benchmarks.size() == benchmarks.size() && first_benchmark_to_run > 0 &&
+        first_benchmark_to_run < benchmarks.size()) {
      use_deserialized_benchmarks = true;
    }
  }
@ -531,15 +480,13 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
  for (int i = 0; i < 4; i++) {
    max_clock_speed = max(max_clock_speed, measure_clock_speed());
  }
-  
+
  double time_start = 0.0;
  while (first_benchmark_to_run < benchmarks.size()) {
    if (first_benchmark_to_run == 0) {
      time_start = timer.getRealTime();
    }
-    try_run_some_benchmarks(benchmarks,
-                            time_start,
-                            first_benchmark_to_run);
+    try_run_some_benchmarks(benchmarks, time_start, first_benchmark_to_run);
  }

  // Sort timings by increasing benchmark parameters, and decreasing gflops.
@ -550,10 +497,8 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
  // Collect best (i.e. now first) results for each parameter values.
  vector<benchmark_t> best_benchmarks;
  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
-    if (best_benchmarks.empty() ||
-        best_benchmarks.back().compact_product_size != it->compact_product_size ||
-        best_benchmarks.back().compact_block_size != it->compact_block_size)
-    {
+    if (best_benchmarks.empty() || best_benchmarks.back().compact_product_size != it->compact_product_size ||
+        best_benchmarks.back().compact_block_size != it->compact_block_size) {
      best_benchmarks.push_back(*it);
    }
  }
@ -562,11 +507,9 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
  benchmarks = best_benchmarks;
 }

-struct measure_all_pot_sizes_action_t : action_t
-{
+struct measure_all_pot_sizes_action_t : action_t {
  virtual const char* invokation_name() const { return "all-pot-sizes"; }
-  virtual void run() const
-  {
+  virtual void run() const {
    vector<benchmark_t> benchmarks;
    for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
      for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
@ -593,11 +536,9 @@ struct measure_all_pot_sizes_action_t : action_t
  }
 };

-struct measure_default_sizes_action_t : action_t
-{
+struct measure_default_sizes_action_t : action_t {
  virtual const char* invokation_name() const { return "default-sizes"; }
-  virtual void run() const
-  {
+  virtual void run() const {
    vector<benchmark_t> benchmarks;
    for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
      for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
@ -618,8 +559,7 @@ struct measure_default_sizes_action_t : action_t
  }
 };

-int main(int argc, char* argv[])
-{
+int main(int argc, char* argv[]) {
  double time_start = timer.getRealTime();
  cout.precision(4);
  cerr.precision(4);
@ -647,7 +587,7 @@ int main(int argc, char* argv[])
  for (int i = 2; i < argc; i++) {
    if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
      const char* equals_sign = strchr(argv[i], '=');
-      min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
+      min_working_set_size = strtoul(equals_sign + 1, nullptr, 10);
    } else {
      cerr << "unrecognized option: " << argv[i] << endl << endl;
      show_usage_and_exit(argc, argv, available_actions);
@ -657,7 +597,7 @@ int main(int argc, char* argv[])
  print_cpuinfo();

  cout << "benchmark parameters:" << endl;
-  cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
+  cout << "pointer size: " << 8 * sizeof(void*) << " bits" << endl;
  cout << "scalar type: " << type_name<Scalar>() << endl;
  cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
  cout << "minsize = " << minsize << endl;
--- a/bench/benchmark.cpp
+++ b/bench/benchmark.cpp
@ -19,21 +19,18 @@ using namespace Eigen;
 #define SCALAR double
 #endif

-int main(int argc, char *argv[])
-{
-    Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones();
-    Matrix<SCALAR,MATSIZE,MATSIZE> m;
-    for(int i = 0; i < MATSIZE; i++)
-        for(int j = 0; j < MATSIZE; j++)
-        {
-            m(i,j) = (i+MATSIZE*j);
-        }
-    asm("#begin");
-    for(int a = 0; a < REPEAT; a++)
-    {
-        m = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones() + 0.00005 * (m + (m*m));
+int main(int argc, char *argv[]) {
+  Matrix<SCALAR, MATSIZE, MATSIZE> I = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones();
+  Matrix<SCALAR, MATSIZE, MATSIZE> m;
+  for (int i = 0; i < MATSIZE; i++)
+    for (int j = 0; j < MATSIZE; j++) {
+      m(i, j) = (i + MATSIZE * j);
    }
-    asm("#end");
-    cout << m << endl;
-    return 0;
+  asm("#begin");
+  for (int a = 0; a < REPEAT; a++) {
+    m = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones() + 0.00005 * (m + (m * m));
+  }
+  asm("#end");
+  cout << m << endl;
+  return 0;
 }
--- a/bench/benchmarkSlice.cpp
+++ b/bench/benchmarkSlice.cpp
@ -15,23 +15,21 @@ using namespace Eigen;
 #define SCALAR float
 #endif

-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
  typedef Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic> Mat;
  Mat m(100, 100);
  m.setRandom();

-  for(int a = 0; a < REPEAT; a++)
-  {
+  for (int a = 0; a < REPEAT; a++) {
    int r, c, nr, nc;
-    r = Eigen::internal::random<int>(0,10);
-    c = Eigen::internal::random<int>(0,10);
-    nr = Eigen::internal::random<int>(50,80);
-    nc = Eigen::internal::random<int>(50,80);
-    m.block(r,c,nr,nc) += Mat::Ones(nr,nc);
-    m.block(r,c,nr,nc) *= SCALAR(10);
-    m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
-    m.block(r,c,nr,nc) /= SCALAR(10);
+    r = Eigen::internal::random<int>(0, 10);
+    c = Eigen::internal::random<int>(0, 10);
+    nr = Eigen::internal::random<int>(50, 80);
+    nc = Eigen::internal::random<int>(50, 80);
+    m.block(r, c, nr, nc) += Mat::Ones(nr, nc);
+    m.block(r, c, nr, nc) *= SCALAR(10);
+    m.block(r, c, nr, nc) -= Mat::constant(nr, nc, 10);
+    m.block(r, c, nr, nc) /= SCALAR(10);
  }
  cout << m[0] << endl;
  return 0;
--- a/bench/benchmarkX.cpp
+++ b/bench/benchmarkX.cpp
@ -19,18 +19,16 @@ using namespace Eigen;
 #define REPEAT 100
 #endif

-int main(int argc, char *argv[])
-{
-	MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE);
-	MATTYPE m(MATSIZE,MATSIZE);
-	for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
-	{
-		m(i,j) = (i+j+1)/(MATSIZE*MATSIZE);
-	}
-	for(int a = 0; a < REPEAT; a++)
-	{
-		m = I + 0.0001 * (m + m*m);
-	}
-	cout << m(0,0) << endl;
-	return 0;
+int main(int argc, char *argv[]) {
+  MATTYPE I = MATTYPE::Ones(MATSIZE, MATSIZE);
+  MATTYPE m(MATSIZE, MATSIZE);
+  for (int i = 0; i < MATSIZE; i++)
+    for (int j = 0; j < MATSIZE; j++) {
+      m(i, j) = (i + j + 1) / (MATSIZE * MATSIZE);
+    }
+  for (int a = 0; a < REPEAT; a++) {
+    m = I + 0.0001 * (m + m * m);
+  }
+  cout << m(0, 0) << endl;
+  return 0;
 }
--- a/bench/benchmarkXcwise.cpp
+++ b/bench/benchmarkXcwise.cpp
@ -18,18 +18,15 @@ using namespace Eigen;
 #define REPEAT 1000
 #endif

-int main(int argc, char *argv[])
-{
-	VECTYPE I = VECTYPE::Ones(VECSIZE);
-	VECTYPE m(VECSIZE,1);
-	for(int i = 0; i < VECSIZE; i++)
-	{
-		m[i] = 0.1 * i/VECSIZE;
-	}
-	for(int a = 0; a < REPEAT; a++)
-	{
-		m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
-	}
-	cout << m[0] << endl;
-	return 0;
+int main(int argc, char *argv[]) {
+  VECTYPE I = VECTYPE::Ones(VECSIZE);
+  VECTYPE m(VECSIZE, 1);
+  for (int i = 0; i < VECSIZE; i++) {
+    m[i] = 0.1 * i / VECSIZE;
+  }
+  for (int a = 0; a < REPEAT; a++) {
+    m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m / 4);
+  }
+  cout << m[0] << endl;
+  return 0;
 }
--- a/bench/btl/actions/action_aat_product.hh
+++ b/bench/btl/actions/action_aat_product.hh
@ -28,101 +28,80 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_aat_product {
-
-public :
-
+ public:
  // Ctor

-  Action_aat_product( int size ):_size(size)
-  {
+  Action_aat_product(int size) : _size(size) {
    MESSAGE("Action_aat_product Ctor");

    // STL matrix and vector initialization

-    init_matrix<pseudo_random>(A_stl,_size);
-    init_matrix<null_function>(X_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_matrix<null_function>(X_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

    // generic matrix and vector initialization

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(X_ref,X_stl);
-
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::matrix_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);

+    Interface::matrix_from_stl(A, A_stl);
+    Interface::matrix_from_stl(X, X_stl);
  }

  // invalidate copy ctor

-  Action_aat_product( const  Action_aat_product & )
-  {
+  Action_aat_product(const Action_aat_product&) {
    INFOS("illegal call to Action_aat_product Copy Ctor");
    exit(0);
  }

  // Dtor

-  ~Action_aat_product( void ){
-
+  ~Action_aat_product(void) {
    MESSAGE("Action_aat_product Dtor");

    // deallocation

-    Interface::free_matrix(A,_size);
-    Interface::free_matrix(X,_size);
-
-    Interface::free_matrix(A_ref,_size);
-    Interface::free_matrix(X_ref,_size);
+    Interface::free_matrix(A, _size);
+    Interface::free_matrix(X, _size);

+    Interface::free_matrix(A_ref, _size);
+    Interface::free_matrix(X_ref, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "aat_"+Interface::name();
+  static inline std::string name(void) { return "aat_" + Interface::name(); }
+
+  double nb_op_base(void) { return double(_size) * double(_size) * double(_size); }
+
+  inline void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_matrix(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return double(_size)*double(_size)*double(_size);
-  }
+  inline void calculate(void) { Interface::aat_product(A, X, _size); }

-  inline void initialize( void ){
-
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_matrix(X_ref,X,_size);
-
-  }
-
-  inline void calculate( void ) {
-
-      Interface::aat_product(A,X,_size);
-
-  }
-
-  void check_result( void ){
-    if (_size>128) return;
+  void check_result(void) {
+    if (_size > 128) return;
    // calculation check

-    Interface::matrix_to_stl(X,resu_stl);
+    Interface::matrix_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::aat_product(A_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::aat_product(A_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-6){
+    if (error > 1.e-6) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(1);
    }
-
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_matrix X_stl;
  typename Interface::stl_matrix resu_stl;
@ -133,13 +112,7 @@ private :
  typename Interface::gene_matrix A;
  typename Interface::gene_matrix X;

-
  int _size;
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/action_ata_product.hh
+++ b/bench/btl/actions/action_ata_product.hh
@ -28,101 +28,80 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_ata_product {
-
-public :
-
+ public:
  // Ctor

-  Action_ata_product( int size ):_size(size)
-  {
+  Action_ata_product(int size) : _size(size) {
    MESSAGE("Action_ata_product Ctor");

    // STL matrix and vector initialization

-    init_matrix<pseudo_random>(A_stl,_size);
-    init_matrix<null_function>(X_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_matrix<null_function>(X_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

    // generic matrix and vector initialization

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(X_ref,X_stl);
-
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::matrix_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);

+    Interface::matrix_from_stl(A, A_stl);
+    Interface::matrix_from_stl(X, X_stl);
  }

  // invalidate copy ctor

-  Action_ata_product( const  Action_ata_product & )
-  {
+  Action_ata_product(const Action_ata_product&) {
    INFOS("illegal call to Action_ata_product Copy Ctor");
    exit(0);
  }

  // Dtor

-  ~Action_ata_product( void ){
-
+  ~Action_ata_product(void) {
    MESSAGE("Action_ata_product Dtor");

    // deallocation

-    Interface::free_matrix(A,_size);
-    Interface::free_matrix(X,_size);
-
-    Interface::free_matrix(A_ref,_size);
-    Interface::free_matrix(X_ref,_size);
+    Interface::free_matrix(A, _size);
+    Interface::free_matrix(X, _size);

+    Interface::free_matrix(A_ref, _size);
+    Interface::free_matrix(X_ref, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "ata_"+Interface::name();
+  static inline std::string name(void) { return "ata_" + Interface::name(); }
+
+  double nb_op_base(void) { return 2.0 * _size * _size * _size; }
+
+  inline void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_matrix(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return 2.0*_size*_size*_size;
-  }
+  inline void calculate(void) { Interface::ata_product(A, X, _size); }

-  inline void initialize( void ){
-
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_matrix(X_ref,X,_size);
-
-  }
-
-  inline void calculate( void ) {
-
-      Interface::ata_product(A,X,_size);
-
-  }
-
-  void check_result( void ){
-    if (_size>128) return;
+  void check_result(void) {
+    if (_size > 128) return;
    // calculation check

-    Interface::matrix_to_stl(X,resu_stl);
+    Interface::matrix_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::ata_product(A_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::ata_product(A_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-6){
+    if (error > 1.e-6) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(1);
    }
-
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_matrix X_stl;
  typename Interface::stl_matrix resu_stl;
@ -133,13 +112,7 @@ private :
  typename Interface::gene_matrix A;
  typename Interface::gene_matrix X;

-
  int _size;
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/action_atv_product.hh
+++ b/bench/btl/actions/action_atv_product.hh
@ -28,87 +28,79 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_atv_product {
-
-public :
-
-  Action_atv_product( int size ) : _size(size)
-  {
+ public:
+  Action_atv_product(int size) : _size(size) {
    MESSAGE("Action_atv_product Ctor");

    // STL matrix and vector initialization

-    init_matrix<pseudo_random>(A_stl,_size);
-    init_vector<pseudo_random>(B_stl,_size);
-    init_vector<null_function>(X_stl,_size);
-    init_vector<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_vector<pseudo_random>(B_stl, _size);
+    init_vector<null_function>(X_stl, _size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::vector_from_stl(B_ref,B_stl);
-    Interface::vector_from_stl(X_ref,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::vector_from_stl(B_ref, B_stl);
+    Interface::vector_from_stl(X_ref, X_stl);

-    Interface::matrix_from_stl(A,A_stl);
-    Interface::vector_from_stl(B,B_stl);
-    Interface::vector_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::vector_from_stl(B, B_stl);
+    Interface::vector_from_stl(X, X_stl);
  }

  // invalidate copy ctor
-  Action_atv_product( const  Action_atv_product & )
-  {
+  Action_atv_product(const Action_atv_product&) {
    INFOS("illegal call to Action_atv_product Copy Ctor");
    exit(1);
  }

-  ~Action_atv_product( void )
-  {
+  ~Action_atv_product(void) {
    MESSAGE("Action_atv_product Dtor");

-    Interface::free_matrix(A,_size);
+    Interface::free_matrix(A, _size);
    Interface::free_vector(B);
    Interface::free_vector(X);

-    Interface::free_matrix(A_ref,_size);
+    Interface::free_matrix(A_ref, _size);
    Interface::free_vector(B_ref);
    Interface::free_vector(X_ref);
  }

  static inline std::string name() { return "atv_" + Interface::name(); }

-  double nb_op_base( void ) { return 2.0*_size*_size; }
+  double nb_op_base(void) { return 2.0 * _size * _size; }

-  inline void initialize( void ){
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_vector(B_ref,B,_size);
-    Interface::copy_vector(X_ref,X,_size);
+  inline void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_vector(B_ref, B, _size);
+    Interface::copy_vector(X_ref, X, _size);
  }

-  BTL_DONT_INLINE void calculate( void ) {
+  BTL_DONT_INLINE void calculate(void) {
    BTL_ASM_COMMENT("begin atv");
-    Interface::atv_product(A,B,X,_size);
+    Interface::atv_product(A, B, X, _size);
    BTL_ASM_COMMENT("end atv");
  }

-  void check_result( void )
-  {
-    if (_size>128) return;
-    Interface::vector_to_stl(X,resu_stl);
+  void check_result(void) {
+    if (_size > 128) return;
+    Interface::vector_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::atv_product(A_stl,B_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::atv_product(A_stl, B_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-6){
+    if (error > 1.e-6) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(1);
    }
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_vector B_stl;
  typename Interface::stl_vector X_stl;
@ -122,13 +114,7 @@ private :
  typename Interface::gene_vector B;
  typename Interface::gene_vector X;

-
  int _size;
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/action_axpby.hh
+++ b/bench/btl/actions/action_axpby.hh
@ -27,38 +27,34 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_axpby {
-
-public :
-
+ public:
  // Ctor
-  Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size)
-  {
+  Action_axpby(int size) : _alpha(0.5), _beta(0.95), _size(size) {
    MESSAGE("Action_axpby Ctor");

    // STL vector initialization
-    init_vector<pseudo_random>(X_stl,_size);
-    init_vector<pseudo_random>(Y_stl,_size);
-    init_vector<null_function>(resu_stl,_size);
+    init_vector<pseudo_random>(X_stl, _size);
+    init_vector<pseudo_random>(Y_stl, _size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::vector_from_stl(X_ref,X_stl);
-    Interface::vector_from_stl(Y_ref,Y_stl);
+    Interface::vector_from_stl(X_ref, X_stl);
+    Interface::vector_from_stl(Y_ref, Y_stl);

-    Interface::vector_from_stl(X,X_stl);
-    Interface::vector_from_stl(Y,Y_stl);
+    Interface::vector_from_stl(X, X_stl);
+    Interface::vector_from_stl(Y, Y_stl);
  }

  // invalidate copy ctor
-  Action_axpby( const  Action_axpby & )
-  {
+  Action_axpby(const Action_axpby&) {
    INFOS("illegal call to Action_axpby Copy Ctor");
    exit(1);
  }

  // Dtor
-  ~Action_axpby( void ){
+  ~Action_axpby(void) {
    MESSAGE("Action_axpby Dtor");

    // deallocation
@ -70,44 +66,37 @@ public :
  }

  // action name
-  static inline std::string name( void )
-  {
-    return "axpby_"+Interface::name();
+  static inline std::string name(void) { return "axpby_" + Interface::name(); }
+
+  double nb_op_base(void) { return 3.0 * _size; }
+
+  inline void initialize(void) {
+    Interface::copy_vector(X_ref, X, _size);
+    Interface::copy_vector(Y_ref, Y, _size);
  }

-  double nb_op_base( void ){
-    return 3.0*_size;
-  }
-
-  inline void initialize( void ){
-    Interface::copy_vector(X_ref,X,_size);
-    Interface::copy_vector(Y_ref,Y,_size);
-  }
-
-  inline void calculate( void ) {
+  inline void calculate(void) {
    BTL_ASM_COMMENT("mybegin axpby");
-    Interface::axpby(_alpha,X,_beta,Y,_size);
+    Interface::axpby(_alpha, X, _beta, Y, _size);
    BTL_ASM_COMMENT("myend axpby");
  }

-  void check_result( void ){
-    if (_size>128) return;
+  void check_result(void) {
+    if (_size > 128) return;
    // calculation check
-    Interface::vector_to_stl(Y,resu_stl);
+    Interface::vector_to_stl(Y, resu_stl);

-    STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
+    STL_interface<typename Interface::real_type>::axpby(_alpha, X_stl, _beta, Y_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);

-    if (error>1.e-6){
+    if (error > 1.e-6) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(2);
    }
  }

-private :
-
+ private:
  typename Interface::stl_vector X_stl;
  typename Interface::stl_vector Y_stl;
  typename Interface::stl_vector resu_stl;
--- a/bench/btl/actions/action_axpy.hh
+++ b/bench/btl/actions/action_axpy.hh
@ -28,46 +28,39 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_axpy {
-
-public :
-
+ public:
  // Ctor

-  Action_axpy( int size ):_coef(1.0),_size(size)
-  {
+  Action_axpy(int size) : _coef(1.0), _size(size) {
    MESSAGE("Action_axpy Ctor");

    // STL vector initialization

-    init_vector<pseudo_random>(X_stl,_size);
-    init_vector<pseudo_random>(Y_stl,_size);
-    init_vector<null_function>(resu_stl,_size);
+    init_vector<pseudo_random>(X_stl, _size);
+    init_vector<pseudo_random>(Y_stl, _size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization

-    Interface::vector_from_stl(X_ref,X_stl);
-    Interface::vector_from_stl(Y_ref,Y_stl);
-
-    Interface::vector_from_stl(X,X_stl);
-    Interface::vector_from_stl(Y,Y_stl);
-
+    Interface::vector_from_stl(X_ref, X_stl);
+    Interface::vector_from_stl(Y_ref, Y_stl);

+    Interface::vector_from_stl(X, X_stl);
+    Interface::vector_from_stl(Y, Y_stl);
  }

  // invalidate copy ctor

-  Action_axpy( const  Action_axpy & )
-  {
+  Action_axpy(const Action_axpy&) {
    INFOS("illegal call to Action_axpy Copy Ctor");
    exit(1);
  }

  // Dtor

-  ~Action_axpy( void ){
-
+  ~Action_axpy(void) {
    MESSAGE("Action_axpy Dtor");

    // deallocation
@ -81,46 +74,38 @@ public :

  // action name

-  static inline std::string name( void )
-  {
-    return "axpy_"+Interface::name();
+  static inline std::string name(void) { return "axpy_" + Interface::name(); }
+
+  double nb_op_base(void) { return 2.0 * _size; }
+
+  inline void initialize(void) {
+    Interface::copy_vector(X_ref, X, _size);
+    Interface::copy_vector(Y_ref, Y, _size);
  }

-  double nb_op_base( void ){
-    return 2.0*_size;
-  }
-
-  inline void initialize( void ){
-    Interface::copy_vector(X_ref,X,_size);
-    Interface::copy_vector(Y_ref,Y,_size);
-  }
-
-  inline void calculate( void ) {
+  inline void calculate(void) {
    BTL_ASM_COMMENT("mybegin axpy");
-    Interface::axpy(_coef,X,Y,_size);
+    Interface::axpy(_coef, X, Y, _size);
    BTL_ASM_COMMENT("myend axpy");
  }

-  void check_result( void ){
-    if (_size>128) return;
+  void check_result(void) {
+    if (_size > 128) return;
    // calculation check

-    Interface::vector_to_stl(Y,resu_stl);
+    Interface::vector_to_stl(Y, resu_stl);

-    STL_interface<typename Interface::real_type>::axpy(_coef,X_stl,Y_stl,_size);
+    STL_interface<typename Interface::real_type>::axpy(_coef, X_stl, Y_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);

-    if (error>1.e-6){
+    if (error > 1.e-6) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(0);
    }
-
  }

-private :
-
+ private:
  typename Interface::stl_vector X_stl;
  typename Interface::stl_vector Y_stl;
  typename Interface::stl_vector resu_stl;
--- a/bench/btl/actions/action_cholesky.hh
+++ b/bench/btl/actions/action_cholesky.hh
@ -27,93 +27,75 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_cholesky {
-
-public :
-
+ public:
  // Ctor

-  Action_cholesky( int size ):_size(size)
-  {
+  Action_cholesky(int size) : _size(size) {
    MESSAGE("Action_cholesky Ctor");

    // STL mat/vec initialization
-    init_matrix_symm<pseudo_random>(X_stl,_size);
-    init_matrix<null_function>(C_stl,_size);
+    init_matrix_symm<pseudo_random>(X_stl, _size);
+    init_matrix<null_function>(C_stl, _size);

    // make sure X is invertible
-    for (int i=0; i<_size; ++i)
-      X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
+    for (int i = 0; i < _size; ++i) X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref,X_stl);
-    Interface::matrix_from_stl(X,X_stl);
-    Interface::matrix_from_stl(C,C_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);
+    Interface::matrix_from_stl(X, X_stl);
+    Interface::matrix_from_stl(C, C_stl);

    _cost = 0;
-    for (int j=0; j<_size; ++j)
-    {
-      double r = std::max(_size - j -1,0);
-      _cost += 2*(r*j+r+j);
+    for (int j = 0; j < _size; ++j) {
+      double r = std::max(_size - j - 1, 0);
+      _cost += 2 * (r * j + r + j);
    }
  }

  // invalidate copy ctor

-  Action_cholesky( const  Action_cholesky & )
-  {
+  Action_cholesky(const Action_cholesky&) {
    INFOS("illegal call to Action_cholesky Copy Ctor");
    exit(1);
  }

  // Dtor

-  ~Action_cholesky( void ){
-
+  ~Action_cholesky(void) {
    MESSAGE("Action_cholesky Dtor");

    // deallocation
-    Interface::free_matrix(X_ref,_size);
-    Interface::free_matrix(X,_size);
-    Interface::free_matrix(C,_size);
+    Interface::free_matrix(X_ref, _size);
+    Interface::free_matrix(X, _size);
+    Interface::free_matrix(C, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "cholesky_"+Interface::name();
-  }
+  static inline std::string name(void) { return "cholesky_" + Interface::name(); }

-  double nb_op_base( void ){
-    return _cost;
-  }
+  double nb_op_base(void) { return _cost; }

-  inline void initialize( void ){
-    Interface::copy_matrix(X_ref,X,_size);
-  }
+  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }

-  inline void calculate( void ) {
-      Interface::cholesky(X,C,_size);
-  }
+  inline void calculate(void) { Interface::cholesky(X, C, _size); }

-  void check_result( void ){
+  void check_result(void) {
    // calculation check
-//     STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
-//
-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-//
-//     if (error>1.e-6){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
-//     }
-
+    //     STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
+    //
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
+    //
+    //     if (error>1.e-6){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    //       exit(0);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_matrix X_stl;
  typename Interface::stl_matrix C_stl;

--- a/bench/btl/actions/action_ger.hh
+++ b/bench/btl/actions/action_ger.hh
@ -23,91 +23,78 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_ger {
-
-public :
-
+ public:
  // Ctor
-  BTL_DONT_INLINE Action_ger( int size ):_size(size)
-  {
+  BTL_DONT_INLINE Action_ger(int size) : _size(size) {
    MESSAGE("Action_ger Ctor");

    // STL matrix and vector initialization
    typename Interface::stl_matrix tmp;
-    init_matrix<pseudo_random>(A_stl,_size);
-    init_vector<pseudo_random>(B_stl,_size);
-    init_vector<pseudo_random>(X_stl,_size);
-    init_vector<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_vector<pseudo_random>(B_stl, _size);
+    init_vector<pseudo_random>(X_stl, _size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::vector_from_stl(B_ref,B_stl);
-    Interface::vector_from_stl(B,B_stl);
-    Interface::vector_from_stl(X_ref,X_stl);
-    Interface::vector_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::vector_from_stl(B_ref, B_stl);
+    Interface::vector_from_stl(B, B_stl);
+    Interface::vector_from_stl(X_ref, X_stl);
+    Interface::vector_from_stl(X, X_stl);
  }

  // invalidate copy ctor
-  Action_ger( const  Action_ger & )
-  {
+  Action_ger(const Action_ger&) {
    INFOS("illegal call to Action_ger Copy Ctor");
    exit(1);
  }

  // Dtor
-  BTL_DONT_INLINE ~Action_ger( void ){
+  BTL_DONT_INLINE ~Action_ger(void) {
    MESSAGE("Action_ger Dtor");
-    Interface::free_matrix(A,_size);
+    Interface::free_matrix(A, _size);
    Interface::free_vector(B);
    Interface::free_vector(X);
-    Interface::free_matrix(A_ref,_size);
+    Interface::free_matrix(A_ref, _size);
    Interface::free_vector(B_ref);
    Interface::free_vector(X_ref);
-
  }

  // action name
-  static inline std::string name( void )
-  {
-    return "ger_" + Interface::name();
+  static inline std::string name(void) { return "ger_" + Interface::name(); }
+
+  double nb_op_base(void) { return 2.0 * _size * _size; }
+
+  BTL_DONT_INLINE void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_vector(B_ref, B, _size);
+    Interface::copy_vector(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return 2.0*_size*_size;
-  }
-
-  BTL_DONT_INLINE  void initialize( void ){
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_vector(B_ref,B,_size);
-    Interface::copy_vector(X_ref,X,_size);
-  }
-
-  BTL_DONT_INLINE void calculate( void ) {
+  BTL_DONT_INLINE void calculate(void) {
    BTL_ASM_COMMENT("#begin ger");
-    Interface::ger(A,B,X,_size);
+    Interface::ger(A, B, X, _size);
    BTL_ASM_COMMENT("end ger");
  }

-  BTL_DONT_INLINE void check_result( void ){
+  BTL_DONT_INLINE void check_result(void) {
    // calculation check
-    Interface::vector_to_stl(X,resu_stl);
+    Interface::vector_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::ger(A_stl,B_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::ger(A_stl, B_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-3){
+    if (error > 1.e-3) {
      INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
+      //       exit(0);
    }
-
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_vector B_stl;
  typename Interface::stl_vector X_stl;
@ -124,5 +111,4 @@ private :
  int _size;
 };

-
 #endif
--- a/bench/btl/actions/action_hessenberg.hh
+++ b/bench/btl/actions/action_hessenberg.hh
@ -27,94 +27,77 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_hessenberg {
-
-public :
-
+ public:
  // Ctor

-  Action_hessenberg( int size ):_size(size)
-  {
+  Action_hessenberg(int size) : _size(size) {
    MESSAGE("Action_hessenberg Ctor");

    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl,_size);
+    init_matrix<pseudo_random>(X_stl, _size);

-    init_matrix<null_function>(C_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+    init_matrix<null_function>(C_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref,X_stl);
-    Interface::matrix_from_stl(X,X_stl);
-    Interface::matrix_from_stl(C,C_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);
+    Interface::matrix_from_stl(X, X_stl);
+    Interface::matrix_from_stl(C, C_stl);

    _cost = 0;
-    for (int j=0; j<_size-2; ++j)
-    {
-      double r = std::max(0,_size-j-1);
-      double b = std::max(0,_size-j-2);
-      _cost += 6 + 3*b + r*r*4 + r*_size*4;
+    for (int j = 0; j < _size - 2; ++j) {
+      double r = std::max(0, _size - j - 1);
+      double b = std::max(0, _size - j - 2);
+      _cost += 6 + 3 * b + r * r * 4 + r * _size * 4;
    }
  }

  // invalidate copy ctor

-  Action_hessenberg( const  Action_hessenberg & )
-  {
+  Action_hessenberg(const Action_hessenberg&) {
    INFOS("illegal call to Action_hessenberg Copy Ctor");
    exit(1);
  }

  // Dtor

-  ~Action_hessenberg( void ){
-
+  ~Action_hessenberg(void) {
    MESSAGE("Action_hessenberg Dtor");

    // deallocation
-    Interface::free_matrix(X_ref,_size);
-    Interface::free_matrix(X,_size);
-    Interface::free_matrix(C,_size);
+    Interface::free_matrix(X_ref, _size);
+    Interface::free_matrix(X, _size);
+    Interface::free_matrix(C, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "hessenberg_"+Interface::name();
-  }
+  static inline std::string name(void) { return "hessenberg_" + Interface::name(); }

-  double nb_op_base( void ){
-    return _cost;
-  }
+  double nb_op_base(void) { return _cost; }

-  inline void initialize( void ){
-    Interface::copy_matrix(X_ref,X,_size);
-  }
+  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }

-  inline void calculate( void ) {
-      Interface::hessenberg(X,C,_size);
-  }
+  inline void calculate(void) { Interface::hessenberg(X, C, _size); }

-  void check_result( void ){
+  void check_result(void) {
    // calculation check
-    Interface::matrix_to_stl(C,resu_stl);
-
-//     STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
-//
-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-//
-//     if (error>1.e-6){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
-//     }
+    Interface::matrix_to_stl(C, resu_stl);

+    //     STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
+    //
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
+    //
+    //     if (error>1.e-6){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    //       exit(0);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_matrix X_stl;
  typename Interface::stl_matrix C_stl;
  typename Interface::stl_matrix resu_stl;
@ -127,97 +110,81 @@ private :
  double _cost;
 };

-template<class Interface>
+template <class Interface>
 class Action_tridiagonalization {
-
-public :
-
+ public:
  // Ctor

-  Action_tridiagonalization( int size ):_size(size)
-  {
+  Action_tridiagonalization(int size) : _size(size) {
    MESSAGE("Action_tridiagonalization Ctor");

    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl,_size);
-    
-    for(int i=0; i<_size; ++i)
-    {
-      for(int j=0; j<i; ++j)
-        X_stl[i][j] = X_stl[j][i];
+    init_matrix<pseudo_random>(X_stl, _size);
+
+    for (int i = 0; i < _size; ++i) {
+      for (int j = 0; j < i; ++j) X_stl[i][j] = X_stl[j][i];
    }
-    
-    init_matrix<null_function>(C_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+
+    init_matrix<null_function>(C_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref,X_stl);
-    Interface::matrix_from_stl(X,X_stl);
-    Interface::matrix_from_stl(C,C_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);
+    Interface::matrix_from_stl(X, X_stl);
+    Interface::matrix_from_stl(C, C_stl);

    _cost = 0;
-    for (int j=0; j<_size-2; ++j)
-    {
-      double r = std::max(0,_size-j-1);
-      double b = std::max(0,_size-j-2);
-      _cost += 6. + 3.*b + r*r*8.;
+    for (int j = 0; j < _size - 2; ++j) {
+      double r = std::max(0, _size - j - 1);
+      double b = std::max(0, _size - j - 2);
+      _cost += 6. + 3. * b + r * r * 8.;
    }
  }

  // invalidate copy ctor

-  Action_tridiagonalization( const  Action_tridiagonalization & )
-  {
+  Action_tridiagonalization(const Action_tridiagonalization&) {
    INFOS("illegal call to Action_tridiagonalization Copy Ctor");
    exit(1);
  }

  // Dtor

-  ~Action_tridiagonalization( void ){
-
+  ~Action_tridiagonalization(void) {
    MESSAGE("Action_tridiagonalization Dtor");

    // deallocation
-    Interface::free_matrix(X_ref,_size);
-    Interface::free_matrix(X,_size);
-    Interface::free_matrix(C,_size);
+    Interface::free_matrix(X_ref, _size);
+    Interface::free_matrix(X, _size);
+    Interface::free_matrix(C, _size);
  }

  // action name

-  static inline std::string name( void ) { return "tridiagonalization_"+Interface::name(); }
+  static inline std::string name(void) { return "tridiagonalization_" + Interface::name(); }

-  double nb_op_base( void ){
-    return _cost;
-  }
+  double nb_op_base(void) { return _cost; }

-  inline void initialize( void ){
-    Interface::copy_matrix(X_ref,X,_size);
-  }
+  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }

-  inline void calculate( void ) {
-      Interface::tridiagonalization(X,C,_size);
-  }
+  inline void calculate(void) { Interface::tridiagonalization(X, C, _size); }

-  void check_result( void ){
+  void check_result(void) {
    // calculation check
-    Interface::matrix_to_stl(C,resu_stl);
-
-//     STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
-//
-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-//
-//     if (error>1.e-6){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
-//     }
+    Interface::matrix_to_stl(C, resu_stl);

+    //     STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
+    //
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
+    //
+    //     if (error>1.e-6){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    //       exit(0);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_matrix X_stl;
  typename Interface::stl_matrix C_stl;
  typename Interface::stl_matrix resu_stl;
--- a/bench/btl/actions/action_lu_decomp.hh
+++ b/bench/btl/actions/action_lu_decomp.hh
@ -27,88 +27,72 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_lu_decomp {
-
-public :
-
+ public:
  // Ctor

-  Action_lu_decomp( int size ):_size(size)
-  {
+  Action_lu_decomp(int size) : _size(size) {
    MESSAGE("Action_lu_decomp Ctor");

    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl,_size);
+    init_matrix<pseudo_random>(X_stl, _size);

-    init_matrix<null_function>(C_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+    init_matrix<null_function>(C_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref,X_stl);
-    Interface::matrix_from_stl(X,X_stl);
-    Interface::matrix_from_stl(C,C_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);
+    Interface::matrix_from_stl(X, X_stl);
+    Interface::matrix_from_stl(C, C_stl);

-    _cost = 2.0*size*size*size/3.0 + size*size;
+    _cost = 2.0 * size * size * size / 3.0 + size * size;
  }

  // invalidate copy ctor

-  Action_lu_decomp( const  Action_lu_decomp & )
-  {
+  Action_lu_decomp(const Action_lu_decomp&) {
    INFOS("illegal call to Action_lu_decomp Copy Ctor");
    exit(1);
  }

  // Dtor

-  ~Action_lu_decomp( void ){
-
+  ~Action_lu_decomp(void) {
    MESSAGE("Action_lu_decomp Dtor");

    // deallocation
-    Interface::free_matrix(X_ref,_size);
-    Interface::free_matrix(X,_size);
-    Interface::free_matrix(C,_size);
+    Interface::free_matrix(X_ref, _size);
+    Interface::free_matrix(X, _size);
+    Interface::free_matrix(C, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "complete_lu_decomp_"+Interface::name();
-  }
+  static inline std::string name(void) { return "complete_lu_decomp_" + Interface::name(); }

-  double nb_op_base( void ){
-    return _cost;
-  }
+  double nb_op_base(void) { return _cost; }

-  inline void initialize( void ){
-    Interface::copy_matrix(X_ref,X,_size);
-  }
+  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }

-  inline void calculate( void ) {
-      Interface::lu_decomp(X,C,_size);
-  }
+  inline void calculate(void) { Interface::lu_decomp(X, C, _size); }

-  void check_result( void ){
+  void check_result(void) {
    // calculation check
-    Interface::matrix_to_stl(C,resu_stl);
-
-//     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
-//
-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-//
-//     if (error>1.e-6){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
-//     }
+    Interface::matrix_to_stl(C, resu_stl);

+    //     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
+    //
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
+    //
+    //     if (error>1.e-6){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    //       exit(0);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_matrix X_stl;
  typename Interface::stl_matrix C_stl;
  typename Interface::stl_matrix resu_stl;
--- a/bench/btl/actions/action_lu_solve.hh
+++ b/bench/btl/actions/action_lu_solve.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  action_lu_solve.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef ACTION_LU_SOLVE
 #define ACTION_LU_SOLVE
 #include "utilities.h"
@ -28,33 +28,25 @@

 using namespace std;

-template<class Interface>
-class Action_lu_solve 
-{
+template <class Interface>
+class Action_lu_solve {
+ public:
+  static inline std::string name(void) { return "lu_solve_" + Interface::name(); }

-public :
-
-  static inline std::string name( void )
-  {
-    return "lu_solve_"+Interface::name();
-  }
-  
-  static double nb_op_base(int size){
-    return 2.0*size*size*size/3.0;  // questionable but not really important
+  static double nb_op_base(int size) {
+    return 2.0 * size * size * size / 3.0;  // questionable but not really important
  }

-
-  static double calculate( int nb_calc, int size ) {
-
+  static double calculate(int nb_calc, int size) {
    // STL matrix and vector initialization
-    
+
    typename Interface::stl_matrix A_stl;
    typename Interface::stl_vector B_stl;
    typename Interface::stl_vector X_stl;

-    init_matrix<pseudo_random>(A_stl,size);
-    init_vector<pseudo_random>(B_stl,size);
-    init_vector<null_function>(X_stl,size);
+    init_matrix<pseudo_random>(A_stl, size);
+    init_vector<pseudo_random>(B_stl, size);
+    init_vector<null_function>(X_stl, size);

    // generic matrix and vector initialization

@ -62,18 +54,18 @@ public :
    typename Interface::gene_vector B;
    typename Interface::gene_vector X;

-    typename Interface::gene_matrix LU; 
+    typename Interface::gene_matrix LU;
+
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::vector_from_stl(B, B_stl);
+    Interface::vector_from_stl(X, X_stl);
+    Interface::matrix_from_stl(LU, A_stl);

-    Interface::matrix_from_stl(A,A_stl);
-    Interface::vector_from_stl(B,B_stl);
-    Interface::vector_from_stl(X,X_stl);
-    Interface::matrix_from_stl(LU,A_stl);
-  
    // local variable :

-    typename Interface::Pivot_Vector pivot; // pivot vector
-    Interface::new_Pivot_Vector(pivot,size);
-    
+    typename Interface::Pivot_Vector pivot;  // pivot vector
+    Interface::new_Pivot_Vector(pivot, size);
+
    // timer utilities

    Portable_Timer chronos;
@ -81,56 +73,48 @@ public :
    // time measurement

    chronos.start();
-    
-    for (int ii=0;ii<nb_calc;ii++){

+    for (int ii = 0; ii < nb_calc; ii++) {
      // LU factorization
-      Interface::copy_matrix(A,LU,size);
-      Interface::LU_factor(LU,pivot,size);
-      
+      Interface::copy_matrix(A, LU, size);
+      Interface::LU_factor(LU, pivot, size);
+
      // LU solve

-      Interface::LU_solve(LU,pivot,B,X,size);
-
+      Interface::LU_solve(LU, pivot, B, X, size);
    }

    // Time stop

    chronos.stop();

-    double time=chronos.user_time();
-  
+    double time = chronos.user_time();
+
    // check result :

    typename Interface::stl_vector B_new_stl(size);
-    Interface::vector_to_stl(X,X_stl);
+    Interface::vector_to_stl(X, X_stl);

-    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,X_stl,B_new_stl,size); 
-  
-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(B_stl,B_new_stl);
-    
-    if (error>1.e-5){
+    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, X_stl, B_new_stl, size);
+
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(B_stl, B_new_stl);
+
+    if (error > 1.e-5) {
      INFOS("WRONG CALCULATION...residual=" << error);
      STL_interface<typename Interface::real_type>::display_vector(B_stl);
      STL_interface<typename Interface::real_type>::display_vector(B_new_stl);
      exit(0);
    }
-    
+
    // deallocation and return time
-    
-    Interface::free_matrix(A,size);
+
+    Interface::free_matrix(A, size);
    Interface::free_vector(B);
    Interface::free_vector(X);
    Interface::free_Pivot_Vector(pivot);

    return time;
  }
-
 };
-  

 #endif
-
-
-
--- a/bench/btl/actions/action_matrix_matrix_product.hh
+++ b/bench/btl/actions/action_matrix_matrix_product.hh
@ -28,103 +28,83 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_matrix_matrix_product {
-
-public :
-
+ public:
  // Ctor

-  Action_matrix_matrix_product( int size ):_size(size)
-  {
+  Action_matrix_matrix_product(int size) : _size(size) {
    MESSAGE("Action_matrix_matrix_product Ctor");

    // STL matrix and vector initialization

-    init_matrix<pseudo_random>(A_stl,_size);
-    init_matrix<pseudo_random>(B_stl,_size);
-    init_matrix<null_function>(X_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_matrix<pseudo_random>(B_stl, _size);
+    init_matrix<null_function>(X_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

    // generic matrix and vector initialization

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(B_ref,B_stl);
-    Interface::matrix_from_stl(X_ref,X_stl);
-
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::matrix_from_stl(B,B_stl);
-    Interface::matrix_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(B_ref, B_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);

+    Interface::matrix_from_stl(A, A_stl);
+    Interface::matrix_from_stl(B, B_stl);
+    Interface::matrix_from_stl(X, X_stl);
  }

  // invalidate copy ctor

-  Action_matrix_matrix_product( const  Action_matrix_matrix_product & )
-  {
+  Action_matrix_matrix_product(const Action_matrix_matrix_product&) {
    INFOS("illegal call to Action_matrix_matrix_product Copy Ctor");
    exit(0);
  }

  // Dtor

-  ~Action_matrix_matrix_product( void ){
-
+  ~Action_matrix_matrix_product(void) {
    MESSAGE("Action_matrix_matrix_product Dtor");

    // deallocation

-    Interface::free_matrix(A,_size);
-    Interface::free_matrix(B,_size);
-    Interface::free_matrix(X,_size);
-
-    Interface::free_matrix(A_ref,_size);
-    Interface::free_matrix(B_ref,_size);
-    Interface::free_matrix(X_ref,_size);
+    Interface::free_matrix(A, _size);
+    Interface::free_matrix(B, _size);
+    Interface::free_matrix(X, _size);

+    Interface::free_matrix(A_ref, _size);
+    Interface::free_matrix(B_ref, _size);
+    Interface::free_matrix(X_ref, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "matrix_matrix_"+Interface::name();
+  static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
+
+  double nb_op_base(void) { return 2.0 * _size * _size * _size; }
+
+  inline void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_matrix(B_ref, B, _size);
+    Interface::copy_matrix(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return 2.0*_size*_size*_size;
-  }
-
-  inline void initialize( void ){
-
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_matrix(B_ref,B,_size);
-    Interface::copy_matrix(X_ref,X,_size);
-
-  }
-
-  inline void calculate( void ) {
-      Interface::matrix_matrix_product(A,B,X,_size);
-  }
-
-  void check_result( void ){
+  inline void calculate(void) { Interface::matrix_matrix_product(A, B, X, _size); }

+  void check_result(void) {
    // calculation check
-    if (_size<200)
-    {
-      Interface::matrix_to_stl(X,resu_stl);
-      STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
-      typename Interface::real_type error=
-        STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
-      if (error>1.e-6){
+    if (_size < 200) {
+      Interface::matrix_to_stl(X, resu_stl);
+      STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, _size);
+      typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
+      if (error > 1.e-6) {
        INFOS("WRONG CALCULATION...residual=" << error);
        exit(1);
      }
    }
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_matrix B_stl;
  typename Interface::stl_matrix X_stl;
@ -138,13 +118,7 @@ private :
  typename Interface::gene_matrix B;
  typename Interface::gene_matrix X;

-
  int _size;
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/action_matrix_matrix_product_bis.hh
+++ b/bench/btl/actions/action_matrix_matrix_product_bis.hh
@ -29,31 +29,23 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_matrix_matrix_product_bis {
+ public:
+  static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }

-public :
-
-  static inline std::string name( void )
-  {
-    return "matrix_matrix_"+Interface::name();
-  }
-
-  static double nb_op_base(int size){
-    return 2.0*size*size*size;
-  }
-
-  static double calculate( int nb_calc, int size ) {
+  static double nb_op_base(int size) { return 2.0 * size * size * size; }

+  static double calculate(int nb_calc, int size) {
    // STL matrix and vector initialization

    typename Interface::stl_matrix A_stl;
    typename Interface::stl_matrix B_stl;
    typename Interface::stl_matrix X_stl;

-    init_matrix<pseudo_random>(A_stl,size);
-    init_matrix<pseudo_random>(B_stl,size);
-    init_matrix<null_function>(X_stl,size);
+    init_matrix<pseudo_random>(A_stl, size);
+    init_matrix<pseudo_random>(B_stl, size);
+    init_matrix<null_function>(X_stl, size);

    // generic matrix and vector initialization

@ -65,15 +57,13 @@ public :
    typename Interface::gene_matrix B;
    typename Interface::gene_matrix X;

+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(B_ref, B_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(B_ref,B_stl);
-    Interface::matrix_from_stl(X_ref,X_stl);
-
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::matrix_from_stl(B,B_stl);
-    Interface::matrix_from_stl(X,X_stl);
-
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::matrix_from_stl(B, B_stl);
+    Interface::matrix_from_stl(X, X_stl);

    // STL_timer utilities

@ -84,15 +74,12 @@ public :
    chronos.start_baseline(nb_calc);

    do {
-
-      Interface::copy_matrix(A_ref,A,size);
-      Interface::copy_matrix(B_ref,B,size);
-      Interface::copy_matrix(X_ref,X,size);
-
+      Interface::copy_matrix(A_ref, A, size);
+      Interface::copy_matrix(B_ref, B, size);
+      Interface::copy_matrix(X_ref, X, size);

      //      Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!!
-    }
-    while(chronos.check());
+    } while (chronos.check());

    chronos.report(true);

@ -101,52 +88,44 @@ public :
    chronos.start(nb_calc);

    do {
+      Interface::copy_matrix(A_ref, A, size);
+      Interface::copy_matrix(B_ref, B, size);
+      Interface::copy_matrix(X_ref, X, size);

-      Interface::copy_matrix(A_ref,A,size);
-      Interface::copy_matrix(B_ref,B,size);
-      Interface::copy_matrix(X_ref,X,size);
-
-      Interface::matrix_matrix_product(A,B,X,size); // here it is not commented !!!!
-    }
-    while(chronos.check());
+      Interface::matrix_matrix_product(A, B, X, size);  // here it is not commented !!!!
+    } while (chronos.check());

    chronos.report(true);

-    double time=chronos.calculated_time/2000.0;
+    double time = chronos.calculated_time / 2000.0;

    // calculation check

    typename Interface::stl_matrix resu_stl(size);

-    Interface::matrix_to_stl(X,resu_stl);
+    Interface::matrix_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,size);
+    STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-6){
+    if (error > 1.e-6) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(1);
    }

    // deallocation and return time

-    Interface::free_matrix(A,size);
-    Interface::free_matrix(B,size);
-    Interface::free_matrix(X,size);
+    Interface::free_matrix(A, size);
+    Interface::free_matrix(B, size);
+    Interface::free_matrix(X, size);

-    Interface::free_matrix(A_ref,size);
-    Interface::free_matrix(B_ref,size);
-    Interface::free_matrix(X_ref,size);
+    Interface::free_matrix(A_ref, size);
+    Interface::free_matrix(B_ref, size);
+    Interface::free_matrix(X_ref, size);

    return time;
  }
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/action_matrix_vector_product.hh
+++ b/bench/btl/actions/action_matrix_vector_product.hh
@ -28,106 +28,88 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_matrix_vector_product {
-
-public :
-
+ public:
  // Ctor

-  BTL_DONT_INLINE Action_matrix_vector_product( int size ):_size(size)
-  {
+  BTL_DONT_INLINE Action_matrix_vector_product(int size) : _size(size) {
    MESSAGE("Action_matrix_vector_product Ctor");

    // STL matrix and vector initialization

-    init_matrix<pseudo_random>(A_stl,_size);
-    init_vector<pseudo_random>(B_stl,_size);
-    init_vector<null_function>(X_stl,_size);
-    init_vector<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_vector<pseudo_random>(B_stl, _size);
+    init_vector<null_function>(X_stl, _size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::vector_from_stl(B_ref,B_stl);
-    Interface::vector_from_stl(B,B_stl);
-    Interface::vector_from_stl(X_ref,X_stl);
-    Interface::vector_from_stl(X,X_stl);
-
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::vector_from_stl(B_ref, B_stl);
+    Interface::vector_from_stl(B, B_stl);
+    Interface::vector_from_stl(X_ref, X_stl);
+    Interface::vector_from_stl(X, X_stl);
  }

  // invalidate copy ctor

-  Action_matrix_vector_product( const  Action_matrix_vector_product & )
-  {
+  Action_matrix_vector_product(const Action_matrix_vector_product&) {
    INFOS("illegal call to Action_matrix_vector_product Copy Ctor");
    exit(1);
  }

  // Dtor

-  BTL_DONT_INLINE ~Action_matrix_vector_product( void ){
-
+  BTL_DONT_INLINE ~Action_matrix_vector_product(void) {
    MESSAGE("Action_matrix_vector_product Dtor");

    // deallocation

-    Interface::free_matrix(A,_size);
+    Interface::free_matrix(A, _size);
    Interface::free_vector(B);
    Interface::free_vector(X);

-    Interface::free_matrix(A_ref,_size);
+    Interface::free_matrix(A_ref, _size);
    Interface::free_vector(B_ref);
    Interface::free_vector(X_ref);
-
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "matrix_vector_" + Interface::name();
+  static inline std::string name(void) { return "matrix_vector_" + Interface::name(); }
+
+  double nb_op_base(void) { return 2.0 * _size * _size; }
+
+  BTL_DONT_INLINE void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_vector(B_ref, B, _size);
+    Interface::copy_vector(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return 2.0*_size*_size;
+  BTL_DONT_INLINE void calculate(void) {
+    BTL_ASM_COMMENT("#begin matrix_vector_product");
+    Interface::matrix_vector_product(A, B, X, _size);
+    BTL_ASM_COMMENT("end matrix_vector_product");
  }

-  BTL_DONT_INLINE  void initialize( void ){
-
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_vector(B_ref,B,_size);
-    Interface::copy_vector(X_ref,X,_size);
-
-  }
-
-  BTL_DONT_INLINE void calculate( void ) {
-      BTL_ASM_COMMENT("#begin matrix_vector_product");
-      Interface::matrix_vector_product(A,B,X,_size);
-      BTL_ASM_COMMENT("end matrix_vector_product");
-  }
-
-  BTL_DONT_INLINE void check_result( void ){
-
+  BTL_DONT_INLINE void check_result(void) {
    // calculation check

-    Interface::vector_to_stl(X,resu_stl);
+    Interface::vector_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,B_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, B_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-5){
+    if (error > 1.e-5) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(0);
    }
-
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_vector B_stl;
  typename Interface::stl_vector X_stl;
@ -141,13 +123,7 @@ private :
  typename Interface::gene_vector B;
  typename Interface::gene_vector X;

-
  int _size;
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/action_partial_lu.hh
+++ b/bench/btl/actions/action_partial_lu.hh
@ -27,90 +27,73 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_partial_lu {
-
-public :
-
+ public:
  // Ctor

-  Action_partial_lu( int size ):_size(size)
-  {
+  Action_partial_lu(int size) : _size(size) {
    MESSAGE("Action_partial_lu Ctor");

    // STL vector initialization
-    init_matrix<pseudo_random>(X_stl,_size);
-    init_matrix<null_function>(C_stl,_size);
+    init_matrix<pseudo_random>(X_stl, _size);
+    init_matrix<null_function>(C_stl, _size);

    // make sure X is invertible
-    for (int i=0; i<_size; ++i)
-      X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
+    for (int i = 0; i < _size; ++i) X_stl[i][i] = X_stl[i][i] * 1e2 + 1;

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(X_ref,X_stl);
-    Interface::matrix_from_stl(X,X_stl);
-    Interface::matrix_from_stl(C,C_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);
+    Interface::matrix_from_stl(X, X_stl);
+    Interface::matrix_from_stl(C, C_stl);

-    _cost = 2.0*size*size*size/3.0 + size*size;
+    _cost = 2.0 * size * size * size / 3.0 + size * size;
  }

  // invalidate copy ctor

-  Action_partial_lu( const  Action_partial_lu & )
-  {
+  Action_partial_lu(const Action_partial_lu&) {
    INFOS("illegal call to Action_partial_lu Copy Ctor");
    exit(1);
  }

  // Dtor

-  ~Action_partial_lu( void ){
-
+  ~Action_partial_lu(void) {
    MESSAGE("Action_partial_lu Dtor");

    // deallocation
-    Interface::free_matrix(X_ref,_size);
-    Interface::free_matrix(X,_size);
-    Interface::free_matrix(C,_size);
+    Interface::free_matrix(X_ref, _size);
+    Interface::free_matrix(X, _size);
+    Interface::free_matrix(C, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "partial_lu_decomp_"+Interface::name();
-  }
+  static inline std::string name(void) { return "partial_lu_decomp_" + Interface::name(); }

-  double nb_op_base( void ){
-    return _cost;
-  }
+  double nb_op_base(void) { return _cost; }

-  inline void initialize( void ){
-    Interface::copy_matrix(X_ref,X,_size);
-  }
+  inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }

-  inline void calculate( void ) {
-      Interface::partial_lu_decomp(X,C,_size);
-  }
+  inline void calculate(void) { Interface::partial_lu_decomp(X, C, _size); }

-  void check_result( void ){
+  void check_result(void) {
    // calculation check
-//     Interface::matrix_to_stl(C,resu_stl);
-
-//     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
-//
-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
-//
-//     if (error>1.e-6){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
-//     }
+    //     Interface::matrix_to_stl(C,resu_stl);

+    //     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
+    //
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
+    //
+    //     if (error>1.e-6){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    //       exit(0);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_matrix X_stl;
  typename Interface::stl_matrix C_stl;

--- a/bench/btl/actions/action_rot.hh
+++ b/bench/btl/actions/action_rot.hh
@ -23,37 +23,33 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_rot {
-
-public :
-
+ public:
  // Ctor
-  BTL_DONT_INLINE Action_rot( int size ):_size(size)
-  {
+  BTL_DONT_INLINE Action_rot(int size) : _size(size) {
    MESSAGE("Action_rot Ctor");

    // STL matrix and vector initialization
    typename Interface::stl_matrix tmp;
-    init_vector<pseudo_random>(A_stl,_size);
-    init_vector<pseudo_random>(B_stl,_size);
+    init_vector<pseudo_random>(A_stl, _size);
+    init_vector<pseudo_random>(B_stl, _size);

    // generic matrix and vector initialization
-    Interface::vector_from_stl(A_ref,A_stl);
-    Interface::vector_from_stl(A,A_stl);
-    Interface::vector_from_stl(B_ref,B_stl);
-    Interface::vector_from_stl(B,B_stl);
+    Interface::vector_from_stl(A_ref, A_stl);
+    Interface::vector_from_stl(A, A_stl);
+    Interface::vector_from_stl(B_ref, B_stl);
+    Interface::vector_from_stl(B, B_stl);
  }

  // invalidate copy ctor
-  Action_rot( const  Action_rot & )
-  {
+  Action_rot(const Action_rot&) {
    INFOS("illegal call to Action_rot Copy Ctor");
    exit(1);
  }

  // Dtor
-  BTL_DONT_INLINE ~Action_rot( void ){
+  BTL_DONT_INLINE ~Action_rot(void) {
    MESSAGE("Action_rot Dtor");
    Interface::free_vector(A);
    Interface::free_vector(B);
@ -62,44 +58,37 @@ public :
  }

  // action name
-  static inline std::string name( void )
-  {
-    return "rot_" + Interface::name();
+  static inline std::string name(void) { return "rot_" + Interface::name(); }
+
+  double nb_op_base(void) { return 6.0 * _size; }
+
+  BTL_DONT_INLINE void initialize(void) {
+    Interface::copy_vector(A_ref, A, _size);
+    Interface::copy_vector(B_ref, B, _size);
  }

-  double nb_op_base( void ){
-    return 6.0*_size;
-  }
-
-  BTL_DONT_INLINE  void initialize( void ){
-    Interface::copy_vector(A_ref,A,_size);
-    Interface::copy_vector(B_ref,B,_size);
-  }
-
-  BTL_DONT_INLINE void calculate( void ) {
+  BTL_DONT_INLINE void calculate(void) {
    BTL_ASM_COMMENT("#begin rot");
-    Interface::rot(A,B,0.5,0.6,_size);
+    Interface::rot(A, B, 0.5, 0.6, _size);
    BTL_ASM_COMMENT("end rot");
  }

-  BTL_DONT_INLINE void check_result( void ){
+  BTL_DONT_INLINE void check_result(void) {
    // calculation check
-//     Interface::vector_to_stl(X,resu_stl);
+    //     Interface::vector_to_stl(X,resu_stl);

-//     STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);
+    //     STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);

-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
-
-//     if (error>1.e-3){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
-//     }
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);

+    //     if (error>1.e-3){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    //       exit(0);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_vector A_stl;
  typename Interface::stl_vector B_stl;

@ -112,5 +101,4 @@ private :
  int _size;
 };

-
 #endif
--- a/bench/btl/actions/action_symv.hh
+++ b/bench/btl/actions/action_symv.hh
@ -28,95 +28,80 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_symv {
-
-public :
-
+ public:
  // Ctor

-  BTL_DONT_INLINE Action_symv( int size ):_size(size)
-  {
+  BTL_DONT_INLINE Action_symv(int size) : _size(size) {
    MESSAGE("Action_symv Ctor");

    // STL matrix and vector initialization
-    init_matrix_symm<pseudo_random>(A_stl,_size);
-    init_vector<pseudo_random>(B_stl,_size);
-    init_vector<null_function>(X_stl,_size);
-    init_vector<null_function>(resu_stl,_size);
+    init_matrix_symm<pseudo_random>(A_stl, _size);
+    init_vector<pseudo_random>(B_stl, _size);
+    init_vector<null_function>(X_stl, _size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::vector_from_stl(B_ref,B_stl);
-    Interface::vector_from_stl(B,B_stl);
-    Interface::vector_from_stl(X_ref,X_stl);
-    Interface::vector_from_stl(X,X_stl);
-
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::vector_from_stl(B_ref, B_stl);
+    Interface::vector_from_stl(B, B_stl);
+    Interface::vector_from_stl(X_ref, X_stl);
+    Interface::vector_from_stl(X, X_stl);
  }

  // invalidate copy ctor

-  Action_symv( const  Action_symv & )
-  {
+  Action_symv(const Action_symv&) {
    INFOS("illegal call to Action_symv Copy Ctor");
    exit(1);
  }

  // Dtor
-  BTL_DONT_INLINE ~Action_symv( void ){
-    Interface::free_matrix(A,_size);
+  BTL_DONT_INLINE ~Action_symv(void) {
+    Interface::free_matrix(A, _size);
    Interface::free_vector(B);
    Interface::free_vector(X);
-    Interface::free_matrix(A_ref,_size);
+    Interface::free_matrix(A_ref, _size);
    Interface::free_vector(B_ref);
    Interface::free_vector(X_ref);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "symv_" + Interface::name();
+  static inline std::string name(void) { return "symv_" + Interface::name(); }
+
+  double nb_op_base(void) { return 2.0 * _size * _size; }
+
+  BTL_DONT_INLINE void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_vector(B_ref, B, _size);
+    Interface::copy_vector(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return 2.0*_size*_size;
+  BTL_DONT_INLINE void calculate(void) {
+    BTL_ASM_COMMENT("#begin symv");
+    Interface::symv(A, B, X, _size);
+    BTL_ASM_COMMENT("end symv");
  }

-  BTL_DONT_INLINE  void initialize( void ){
-
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_vector(B_ref,B,_size);
-    Interface::copy_vector(X_ref,X,_size);
-
-  }
-
-  BTL_DONT_INLINE void calculate( void ) {
-      BTL_ASM_COMMENT("#begin symv");
-      Interface::symv(A,B,X,_size);
-      BTL_ASM_COMMENT("end symv");
-  }
-
-  BTL_DONT_INLINE void check_result( void ){
-    if (_size>128) return;
+  BTL_DONT_INLINE void check_result(void) {
+    if (_size > 128) return;
    // calculation check
-    Interface::vector_to_stl(X,resu_stl);
+    Interface::vector_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::symv(A_stl,B_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::symv(A_stl, B_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-5){
+    if (error > 1.e-5) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(0);
    }
-
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_vector B_stl;
  typename Interface::stl_vector X_stl;
@ -130,10 +115,7 @@ private :
  typename Interface::gene_vector B;
  typename Interface::gene_vector X;

-
  int _size;
-
 };

-
 #endif
--- a/bench/btl/actions/action_syr2.hh
+++ b/bench/btl/actions/action_syr2.hh
@ -28,89 +28,77 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_syr2 {
-
-public :
-
+ public:
  // Ctor

-  BTL_DONT_INLINE Action_syr2( int size ):_size(size)
-  {
+  BTL_DONT_INLINE Action_syr2(int size) : _size(size) {
    // STL matrix and vector initialization
    typename Interface::stl_matrix tmp;
-    init_matrix<pseudo_random>(A_stl,_size);
-    init_vector<pseudo_random>(B_stl,_size);
-    init_vector<pseudo_random>(X_stl,_size);
-    init_vector<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_vector<pseudo_random>(B_stl, _size);
+    init_vector<pseudo_random>(X_stl, _size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(A,A_stl);
-    Interface::vector_from_stl(B_ref,B_stl);
-    Interface::vector_from_stl(B,B_stl);
-    Interface::vector_from_stl(X_ref,X_stl);
-    Interface::vector_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::vector_from_stl(B_ref, B_stl);
+    Interface::vector_from_stl(B, B_stl);
+    Interface::vector_from_stl(X_ref, X_stl);
+    Interface::vector_from_stl(X, X_stl);
  }

  // invalidate copy ctor
-  Action_syr2( const  Action_syr2 & )
-  {
+  Action_syr2(const Action_syr2&) {
    INFOS("illegal call to Action_syr2 Copy Ctor");
    exit(1);
  }

  // Dtor
-  BTL_DONT_INLINE ~Action_syr2( void ){
-    Interface::free_matrix(A,_size);
+  BTL_DONT_INLINE ~Action_syr2(void) {
+    Interface::free_matrix(A, _size);
    Interface::free_vector(B);
    Interface::free_vector(X);
-    Interface::free_matrix(A_ref,_size);
+    Interface::free_matrix(A_ref, _size);
    Interface::free_vector(B_ref);
    Interface::free_vector(X_ref);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "syr2_" + Interface::name();
+  static inline std::string name(void) { return "syr2_" + Interface::name(); }
+
+  double nb_op_base(void) { return 2.0 * _size * _size; }
+
+  BTL_DONT_INLINE void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_vector(B_ref, B, _size);
+    Interface::copy_vector(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return 2.0*_size*_size;
+  BTL_DONT_INLINE void calculate(void) {
+    BTL_ASM_COMMENT("#begin syr2");
+    Interface::syr2(A, B, X, _size);
+    BTL_ASM_COMMENT("end syr2");
  }

-  BTL_DONT_INLINE  void initialize( void ){
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_vector(B_ref,B,_size);
-    Interface::copy_vector(X_ref,X,_size);
-  }
-
-  BTL_DONT_INLINE void calculate( void ) {
-      BTL_ASM_COMMENT("#begin syr2");
-      Interface::syr2(A,B,X,_size);
-      BTL_ASM_COMMENT("end syr2");
-  }
-
-  BTL_DONT_INLINE void check_result( void ){
+  BTL_DONT_INLINE void check_result(void) {
    // calculation check
-    Interface::vector_to_stl(X,resu_stl);
+    Interface::vector_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::syr2(A_stl,B_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::syr2(A_stl, B_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-3){
+    if (error > 1.e-3) {
      INFOS("WRONG CALCULATION...residual=" << error);
-//       exit(0);
+      //       exit(0);
    }
-
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_vector B_stl;
  typename Interface::stl_vector X_stl;
@ -124,10 +112,7 @@ private :
  typename Interface::gene_vector B;
  typename Interface::gene_vector X;

-
  int _size;
-
 };

-
 #endif
--- a/bench/btl/actions/action_trisolve.hh
+++ b/bench/btl/actions/action_trisolve.hh
@ -27,100 +27,82 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_trisolve {
-
-public :
-
+ public:
  // Ctor

-  Action_trisolve( int size ):_size(size)
-  {
+  Action_trisolve(int size) : _size(size) {
    MESSAGE("Action_trisolve Ctor");

    // STL vector initialization
-    init_matrix<pseudo_random>(L_stl,_size);
-    init_vector<pseudo_random>(B_stl,_size);
-    init_vector<null_function>(X_stl,_size);
-    for (int j=0; j<_size; ++j)
-    {
-      for (int i=0; i<j; ++i)
-        L_stl[j][i] = 0;
+    init_matrix<pseudo_random>(L_stl, _size);
+    init_vector<pseudo_random>(B_stl, _size);
+    init_vector<null_function>(X_stl, _size);
+    for (int j = 0; j < _size; ++j) {
+      for (int i = 0; i < j; ++i) L_stl[j][i] = 0;
      L_stl[j][j] += 3;
    }

-    init_vector<null_function>(resu_stl,_size);
+    init_vector<null_function>(resu_stl, _size);

    // generic matrix and vector initialization
-    Interface::matrix_from_stl(L,L_stl);
-    Interface::vector_from_stl(X,X_stl);
-    Interface::vector_from_stl(B,B_stl);
+    Interface::matrix_from_stl(L, L_stl);
+    Interface::vector_from_stl(X, X_stl);
+    Interface::vector_from_stl(B, B_stl);

    _cost = 0;
-    for (int j=0; j<_size; ++j)
-    {
-      _cost += 2*j + 1;
+    for (int j = 0; j < _size; ++j) {
+      _cost += 2 * j + 1;
    }
  }

  // invalidate copy ctor

-  Action_trisolve( const  Action_trisolve & )
-  {
+  Action_trisolve(const Action_trisolve&) {
    INFOS("illegal call to Action_trisolve Copy Ctor");
    exit(1);
  }

  // Dtor

-  ~Action_trisolve( void ){
-
+  ~Action_trisolve(void) {
    MESSAGE("Action_trisolve Dtor");

    // deallocation
-    Interface::free_matrix(L,_size);
+    Interface::free_matrix(L, _size);
    Interface::free_vector(B);
    Interface::free_vector(X);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "trisolve_vector_"+Interface::name();
+  static inline std::string name(void) { return "trisolve_vector_" + Interface::name(); }
+
+  double nb_op_base(void) { return _cost; }
+
+  inline void initialize(void) {
+    // Interface::copy_vector(X_ref,X,_size);
  }

-  double nb_op_base( void ){
-    return _cost;
-  }
+  inline void calculate(void) { Interface::trisolve_lower(L, B, X, _size); }

-  inline void initialize( void ){
-    //Interface::copy_vector(X_ref,X,_size);
-  }
-
-  inline void calculate( void ) {
-      Interface::trisolve_lower(L,B,X,_size);
-  }
-
-  void check_result(){
-    if (_size>128) return;
+  void check_result() {
+    if (_size > 128) return;
    // calculation check
-    Interface::vector_to_stl(X,resu_stl);
+    Interface::vector_to_stl(X, resu_stl);

-    STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size);
+    STL_interface<typename Interface::real_type>::trisolve_lower(L_stl, B_stl, X_stl, _size);

-    typename Interface::real_type error=
-      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);

-    if (error>1.e-4){
+    if (error > 1.e-4) {
      INFOS("WRONG CALCULATION...residual=" << error);
      exit(2);
-    } //else INFOS("CALCULATION OK...residual=" << error);
-
+    }  // else INFOS("CALCULATION OK...residual=" << error);
  }

-private :
-
+ private:
  typename Interface::stl_matrix L_stl;
  typename Interface::stl_vector X_stl;
  typename Interface::stl_vector B_stl;
--- a/bench/btl/actions/action_trisolve_matrix.hh
+++ b/bench/btl/actions/action_trisolve_matrix.hh
@ -28,118 +28,97 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_trisolve_matrix {
-
-public :
-
+ public:
  // Ctor

-  Action_trisolve_matrix( int size ):_size(size)
-  {
+  Action_trisolve_matrix(int size) : _size(size) {
    MESSAGE("Action_trisolve_matrix Ctor");

    // STL matrix and vector initialization

-    init_matrix<pseudo_random>(A_stl,_size);
-    init_matrix<pseudo_random>(B_stl,_size);
-    init_matrix<null_function>(X_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_matrix<pseudo_random>(B_stl, _size);
+    init_matrix<null_function>(X_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

-    for (int j=0; j<_size; ++j)
-    {
-      for (int i=0; i<j; ++i)
-        A_stl[j][i] = 0;
+    for (int j = 0; j < _size; ++j) {
+      for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
      A_stl[j][j] += 3;
    }

    // generic matrix and vector initialization

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(B_ref,B_stl);
-    Interface::matrix_from_stl(X_ref,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(B_ref, B_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);

-    Interface::matrix_from_stl(A,A_stl);
-    Interface::matrix_from_stl(B,B_stl);
-    Interface::matrix_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::matrix_from_stl(B, B_stl);
+    Interface::matrix_from_stl(X, X_stl);

    _cost = 0;
-    for (int j=0; j<_size; ++j)
-    {
-      _cost += 2*j + 1;
+    for (int j = 0; j < _size; ++j) {
+      _cost += 2 * j + 1;
    }
    _cost *= _size;
  }

  // invalidate copy ctor

-  Action_trisolve_matrix( const  Action_trisolve_matrix & )
-  {
+  Action_trisolve_matrix(const Action_trisolve_matrix&) {
    INFOS("illegal call to Action_trisolve_matrix Copy Ctor");
    exit(0);
  }

  // Dtor

-  ~Action_trisolve_matrix( void ){
-
+  ~Action_trisolve_matrix(void) {
    MESSAGE("Action_trisolve_matrix Dtor");

    // deallocation

-    Interface::free_matrix(A,_size);
-    Interface::free_matrix(B,_size);
-    Interface::free_matrix(X,_size);
-
-    Interface::free_matrix(A_ref,_size);
-    Interface::free_matrix(B_ref,_size);
-    Interface::free_matrix(X_ref,_size);
+    Interface::free_matrix(A, _size);
+    Interface::free_matrix(B, _size);
+    Interface::free_matrix(X, _size);

+    Interface::free_matrix(A_ref, _size);
+    Interface::free_matrix(B_ref, _size);
+    Interface::free_matrix(X_ref, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "trisolve_matrix_"+Interface::name();
+  static inline std::string name(void) { return "trisolve_matrix_" + Interface::name(); }
+
+  double nb_op_base(void) { return _cost; }
+
+  inline void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_matrix(B_ref, B, _size);
+    Interface::copy_matrix(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return _cost;
-  }
-
-  inline void initialize( void ){
-
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_matrix(B_ref,B,_size);
-    Interface::copy_matrix(X_ref,X,_size);
-
-  }
-
-  inline void calculate( void ) {
-      Interface::trisolve_lower_matrix(A,B,X,_size);
-  }
-
-  void check_result( void ){
+  inline void calculate(void) { Interface::trisolve_lower_matrix(A, B, X, _size); }

+  void check_result(void) {
    // calculation check

-//     Interface::matrix_to_stl(X,resu_stl);
-//
-//     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
-//
-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
-//
-//     if (error>1.e-6){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-// //       exit(1);
-//     }
-
+    //     Interface::matrix_to_stl(X,resu_stl);
+    //
+    //     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
+    //
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    //
+    //     if (error>1.e-6){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    // //       exit(1);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_matrix B_stl;
  typename Interface::stl_matrix X_stl;
@ -155,11 +134,6 @@ private :

  int _size;
  double _cost;
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/action_trmm.hh
+++ b/bench/btl/actions/action_trmm.hh
@ -28,118 +28,97 @@

 using namespace std;

-template<class Interface>
+template <class Interface>
 class Action_trmm {
-
-public :
-
+ public:
  // Ctor

-  Action_trmm( int size ):_size(size)
-  {
+  Action_trmm(int size) : _size(size) {
    MESSAGE("Action_trmm Ctor");

    // STL matrix and vector initialization

-    init_matrix<pseudo_random>(A_stl,_size);
-    init_matrix<pseudo_random>(B_stl,_size);
-    init_matrix<null_function>(X_stl,_size);
-    init_matrix<null_function>(resu_stl,_size);
+    init_matrix<pseudo_random>(A_stl, _size);
+    init_matrix<pseudo_random>(B_stl, _size);
+    init_matrix<null_function>(X_stl, _size);
+    init_matrix<null_function>(resu_stl, _size);

-    for (int j=0; j<_size; ++j)
-    {
-      for (int i=0; i<j; ++i)
-        A_stl[j][i] = 0;
+    for (int j = 0; j < _size; ++j) {
+      for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
      A_stl[j][j] += 3;
    }

    // generic matrix and vector initialization

-    Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::matrix_from_stl(B_ref,B_stl);
-    Interface::matrix_from_stl(X_ref,X_stl);
+    Interface::matrix_from_stl(A_ref, A_stl);
+    Interface::matrix_from_stl(B_ref, B_stl);
+    Interface::matrix_from_stl(X_ref, X_stl);

-    Interface::matrix_from_stl(A,A_stl);
-    Interface::matrix_from_stl(B,B_stl);
-    Interface::matrix_from_stl(X,X_stl);
+    Interface::matrix_from_stl(A, A_stl);
+    Interface::matrix_from_stl(B, B_stl);
+    Interface::matrix_from_stl(X, X_stl);

    _cost = 0;
-    for (int j=0; j<_size; ++j)
-    {
-      _cost += 2*j + 1;
+    for (int j = 0; j < _size; ++j) {
+      _cost += 2 * j + 1;
    }
    _cost *= _size;
  }

  // invalidate copy ctor

-  Action_trmm( const  Action_trmm & )
-  {
+  Action_trmm(const Action_trmm&) {
    INFOS("illegal call to Action_trmm Copy Ctor");
    exit(0);
  }

  // Dtor

-  ~Action_trmm( void ){
-
+  ~Action_trmm(void) {
    MESSAGE("Action_trmm Dtor");

    // deallocation

-    Interface::free_matrix(A,_size);
-    Interface::free_matrix(B,_size);
-    Interface::free_matrix(X,_size);
-
-    Interface::free_matrix(A_ref,_size);
-    Interface::free_matrix(B_ref,_size);
-    Interface::free_matrix(X_ref,_size);
+    Interface::free_matrix(A, _size);
+    Interface::free_matrix(B, _size);
+    Interface::free_matrix(X, _size);

+    Interface::free_matrix(A_ref, _size);
+    Interface::free_matrix(B_ref, _size);
+    Interface::free_matrix(X_ref, _size);
  }

  // action name

-  static inline std::string name( void )
-  {
-    return "trmm_"+Interface::name();
+  static inline std::string name(void) { return "trmm_" + Interface::name(); }
+
+  double nb_op_base(void) { return _cost; }
+
+  inline void initialize(void) {
+    Interface::copy_matrix(A_ref, A, _size);
+    Interface::copy_matrix(B_ref, B, _size);
+    Interface::copy_matrix(X_ref, X, _size);
  }

-  double nb_op_base( void ){
-    return _cost;
-  }
-
-  inline void initialize( void ){
-
-    Interface::copy_matrix(A_ref,A,_size);
-    Interface::copy_matrix(B_ref,B,_size);
-    Interface::copy_matrix(X_ref,X,_size);
-
-  }
-
-  inline void calculate( void ) {
-      Interface::trmm(A,B,X,_size);
-  }
-
-  void check_result( void ){
+  inline void calculate(void) { Interface::trmm(A, B, X, _size); }

+  void check_result(void) {
    // calculation check

-//     Interface::matrix_to_stl(X,resu_stl);
-//
-//     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
-//
-//     typename Interface::real_type error=
-//       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
-//
-//     if (error>1.e-6){
-//       INFOS("WRONG CALCULATION...residual=" << error);
-// //       exit(1);
-//     }
-
+    //     Interface::matrix_to_stl(X,resu_stl);
+    //
+    //     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
+    //
+    //     typename Interface::real_type error=
+    //       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+    //
+    //     if (error>1.e-6){
+    //       INFOS("WRONG CALCULATION...residual=" << error);
+    // //       exit(1);
+    //     }
  }

-private :
-
+ private:
  typename Interface::stl_matrix A_stl;
  typename Interface::stl_matrix B_stl;
  typename Interface::stl_matrix X_stl;
@ -155,11 +134,6 @@ private :

  int _size;
  double _cost;
-
 };

-
 #endif
-
-
-
--- a/bench/btl/actions/basic_actions.hh
+++ b/bench/btl/actions/basic_actions.hh
@ -18,4 +18,3 @@
 #include "action_rot.hh"

 // #include "action_lu_solve.hh"
-
--- a/bench/btl/data/mean.cxx
+++ b/bench/btl/data/mean.cxx
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  mean.cxx
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #include "utilities.h"
 #include <vector>
 #include <string>
@ -28,155 +28,138 @@

 using namespace std;

-double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
+double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
+                 const int size_max);

-class Lib_Mean{
-
-public:
-  Lib_Mean( void ):_lib_name(),_mean_in_cache(),_mean_out_of_cache(){
+class Lib_Mean {
+ public:
+  Lib_Mean(void) : _lib_name(), _mean_in_cache(), _mean_out_of_cache() {
    MESSAGE("Lib_mean Default Ctor");
    MESSAGE("!!! should not be used");
    exit(0);
  }
-  Lib_Mean(const string & name, const double & mic, const double & moc):_lib_name(name),_mean_in_cache(mic),_mean_out_of_cache(moc){
+  Lib_Mean(const string &name, const double &mic, const double &moc)
+      : _lib_name(name), _mean_in_cache(mic), _mean_out_of_cache(moc) {
    MESSAGE("Lib_mean Ctor");
  }
-  Lib_Mean(const Lib_Mean & lm):_lib_name(lm._lib_name),_mean_in_cache(lm._mean_in_cache),_mean_out_of_cache(lm._mean_out_of_cache){
+  Lib_Mean(const Lib_Mean &lm)
+      : _lib_name(lm._lib_name), _mean_in_cache(lm._mean_in_cache), _mean_out_of_cache(lm._mean_out_of_cache) {
    MESSAGE("Lib_mean Copy Ctor");
  }
-  ~Lib_Mean( void ){
-    MESSAGE("Lib_mean Dtor");
-  }
-    
+  ~Lib_Mean(void) { MESSAGE("Lib_mean Dtor"); }
+
  double _mean_in_cache;
  double _mean_out_of_cache;
  string _lib_name;

-  bool operator < ( const Lib_Mean &right) const 
-  {
-    //return ( this->_mean_out_of_cache > right._mean_out_of_cache) ;
-    return ( this->_mean_in_cache > right._mean_in_cache) ;
+  bool operator<(const Lib_Mean &right) const {
+    // return ( this->_mean_out_of_cache > right._mean_out_of_cache) ;
+    return (this->_mean_in_cache > right._mean_in_cache);
  }
+};

-}; 
-
-
-int main( int argc , char *argv[] )
-{
-
-  if (argc<6){
+int main(int argc, char *argv[]) {
+  if (argc < 6) {
    INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2...");
    exit(0);
  }
  INFOS(argc);

-  int min_in_cache=atoi(argv[2]);
-  int max_in_cache=atoi(argv[3]);
-  int min_out_of_cache=atoi(argv[4]);
-  int max_out_of_cache=atoi(argv[5]);
+  int min_in_cache = atoi(argv[2]);
+  int max_in_cache = atoi(argv[3]);
+  int min_out_of_cache = atoi(argv[4]);
+  int max_out_of_cache = atoi(argv[5]);

+  multiset<Lib_Mean> s_lib_mean;

-  multiset<Lib_Mean> s_lib_mean ;
+  for (int i = 6; i < argc; i++) {
+    string filename = argv[i];

-  for (int i=6;i<argc;i++){
-    
-    string filename=argv[i];
-    
    INFOS(filename);

-    double mic=0;
-    double moc=0;
+    double mic = 0;
+    double moc = 0;

    {
-      
      vector<int> tab_sizes;
      vector<double> tab_mflops;

-      read_xy_file(filename,tab_sizes,tab_mflops);
+      read_xy_file(filename, tab_sizes, tab_mflops);

-      mic=mean_calc(tab_sizes,tab_mflops,min_in_cache,max_in_cache);
-      moc=mean_calc(tab_sizes,tab_mflops,min_out_of_cache,max_out_of_cache);
+      mic = mean_calc(tab_sizes, tab_mflops, min_in_cache, max_in_cache);
+      moc = mean_calc(tab_sizes, tab_mflops, min_out_of_cache, max_out_of_cache);

-      Lib_Mean cur_lib_mean(filename,mic,moc);
-      
-      s_lib_mean.insert(cur_lib_mean);	
+      Lib_Mean cur_lib_mean(filename, mic, moc);

-    }   
-           
+      s_lib_mean.insert(cur_lib_mean);
+    }
  }

-
-  cout << "<TABLE BORDER CELLPADDING=2>" << endl ;
-  cout << "  <TR>" << endl ;
-  cout << "    <TH ALIGN=CENTER> " << argv[1] << " </TH>" << endl ;
-  cout << "    <TH ALIGN=CENTER> <a href=""#mean_marker""> in cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
-  cout << "    <TH ALIGN=CENTER> in cache <BR> % best </TH>" << endl ;
-  cout << "    <TH ALIGN=CENTER> <a href=""#mean_marker""> out of cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
-  cout << "    <TH ALIGN=CENTER> out of cache <BR> % best </TH>" << endl ;
-  cout << "    <TH ALIGN=CENTER> details </TH>" << endl ;
-  cout << "    <TH ALIGN=CENTER> comments </TH>" << endl ;
-  cout << "  </TR>" << endl ;
+  cout << "<TABLE BORDER CELLPADDING=2>" << endl;
+  cout << "  <TR>" << endl;
+  cout << "    <TH ALIGN=CENTER> " << argv[1] << " </TH>" << endl;
+  cout << "    <TH ALIGN=CENTER> <a href="
+          "#mean_marker"
+          "> in cache <BR> mean perf <BR> Mflops </a></TH>"
+       << endl;
+  cout << "    <TH ALIGN=CENTER> in cache <BR> % best </TH>" << endl;
+  cout << "    <TH ALIGN=CENTER> <a href="
+          "#mean_marker"
+          "> out of cache <BR> mean perf <BR> Mflops </a></TH>"
+       << endl;
+  cout << "    <TH ALIGN=CENTER> out of cache <BR> % best </TH>" << endl;
+  cout << "    <TH ALIGN=CENTER> details </TH>" << endl;
+  cout << "    <TH ALIGN=CENTER> comments </TH>" << endl;
+  cout << "  </TR>" << endl;

  multiset<Lib_Mean>::iterator is = s_lib_mean.begin();
-  Lib_Mean best(*is);  
-  
+  Lib_Mean best(*is);

-  for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){
-
-    cout << "  <TR>" << endl ;
-    cout << "     <TD> " << is->_lib_name << " </TD>" << endl ;
-    cout << "     <TD> " << is->_mean_in_cache << " </TD>" << endl ;
-    cout << "     <TD> " << 100*(is->_mean_in_cache/best._mean_in_cache) << " </TD>" << endl ;
-    cout << "     <TD> " << is->_mean_out_of_cache << " </TD>" << endl ;
-    cout << "     <TD> " << 100*(is->_mean_out_of_cache/best._mean_out_of_cache) << " </TD>" << endl ;
-    cout << "     <TD> " << 
-      "<a href=\"#"<<is->_lib_name<<"_"<<argv[1]<<"\">snippet</a>/" 
-      "<a href=\"#"<<is->_lib_name<<"_flags\">flags</a>  </TD>" << endl ;
-    cout << "     <TD> " << 
-      "<a href=\"#"<<is->_lib_name<<"_comments\">click here</a>  </TD>" << endl ;
-    cout << "  </TR>" << endl ;
-  
+  for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) {
+    cout << "  <TR>" << endl;
+    cout << "     <TD> " << is->_lib_name << " </TD>" << endl;
+    cout << "     <TD> " << is->_mean_in_cache << " </TD>" << endl;
+    cout << "     <TD> " << 100 * (is->_mean_in_cache / best._mean_in_cache) << " </TD>" << endl;
+    cout << "     <TD> " << is->_mean_out_of_cache << " </TD>" << endl;
+    cout << "     <TD> " << 100 * (is->_mean_out_of_cache / best._mean_out_of_cache) << " </TD>" << endl;
+    cout << "     <TD> "
+         << "<a href=\"#" << is->_lib_name << "_" << argv[1]
+         << "\">snippet</a>/"
+            "<a href=\"#"
+         << is->_lib_name << "_flags\">flags</a>  </TD>" << endl;
+    cout << "     <TD> "
+         << "<a href=\"#" << is->_lib_name << "_comments\">click here</a>  </TD>" << endl;
+    cout << "  </TR>" << endl;
  }

-  cout << "</TABLE>" << endl ;
+  cout << "</TABLE>" << endl;

-  ofstream output_file ("../order_lib",ios::out) ;
-  
-  for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){
-    output_file << is->_lib_name << endl ;
+  ofstream output_file("../order_lib", ios::out);
+
+  for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) {
+    output_file << is->_lib_name << endl;
  }

  output_file.close();
-
 }

-double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
-  
-  int size=tab_sizes.size();
-  int nb_sample=0;
-  double mean=0.0;
+double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
+                 const int size_max) {
+  int size = tab_sizes.size();
+  int nb_sample = 0;
+  double mean = 0.0;

-  for (int i=0;i<size;i++){
-    
-    
-    if ((tab_sizes[i]>=size_min)&&(tab_sizes[i]<=size_max)){
-      
+  for (int i = 0; i < size; i++) {
+    if ((tab_sizes[i] >= size_min) && (tab_sizes[i] <= size_max)) {
      nb_sample++;
-      mean+=tab_mflops[i];
-
+      mean += tab_mflops[i];
    }
-
-    
  }

-  if (nb_sample==0){
+  if (nb_sample == 0) {
    INFOS("no data for mean calculation");
    return 0.0;
  }

-  return mean/nb_sample;
+  return mean / nb_sample;
 }
-
-  
-
-
--- a/bench/btl/data/regularize.cxx
+++ b/bench/btl/data/regularize.cxx
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  regularize.cxx
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #include "utilities.h"
 #include <vector>
 #include <string>
@ -27,99 +27,82 @@

 using namespace std;

-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
-void regularize_curve(const string & filename,
-		      const vector<double> & tab_mflops, 
-		      const vector<int> & tab_sizes, 
-		      int start_cut_size, int stop_cut_size);
+void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
+void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
+                      int start_cut_size, int stop_cut_size);
 /////////////////////////////////////////////////////////////////////////////////////////////////

-int main( int argc , char *argv[] )
-{
-
+int main(int argc, char *argv[]) {
  // input data

-  if (argc<4){
+  if (argc < 4) {
    INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename");
    exit(0);
  }
  INFOS(argc);

-  int start_cut_size=atoi(argv[2]);
-  int stop_cut_size=atoi(argv[3]);
+  int start_cut_size = atoi(argv[2]);
+  int stop_cut_size = atoi(argv[3]);
+
+  string filename = argv[1];
+  string regularize_filename = argv[4];

-  string filename=argv[1];
-  string regularize_filename=argv[4];
-  
  INFOS(filename);
-  INFOS("start_cut_size="<<start_cut_size);
+  INFOS("start_cut_size=" << start_cut_size);

  vector<int> tab_sizes;
  vector<double> tab_mflops;

-  read_xy_file(filename,tab_sizes,tab_mflops);
+  read_xy_file(filename, tab_sizes, tab_mflops);

  // regularizeing

-  regularize_curve(regularize_filename,tab_mflops,tab_sizes,start_cut_size,stop_cut_size);
-  
-
+  regularize_curve(regularize_filename, tab_mflops, tab_sizes, start_cut_size, stop_cut_size);
 }

 //////////////////////////////////////////////////////////////////////////////////////

-void regularize_curve(const string & filename,
-		      const vector<double> & tab_mflops, 
-		      const vector<int> & tab_sizes, 
-		      int start_cut_size, int stop_cut_size)
-{
-  int size=tab_mflops.size();
-  ofstream output_file (filename.c_str(),ios::out) ;
+void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
+                      int start_cut_size, int stop_cut_size) {
+  int size = tab_mflops.size();
+  ofstream output_file(filename.c_str(), ios::out);

-  int i=0;
+  int i = 0;

-  while(tab_sizes[i]<start_cut_size){
-    
-    output_file << tab_sizes[i] << " " <<  tab_mflops[i] << endl ;
+  while (tab_sizes[i] < start_cut_size) {
+    output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
    i++;
-
-  }
-    
-  output_file << endl ;
-
-  while(tab_sizes[i]<stop_cut_size){
-    
-    i++;
-
  }

-  while(i<size){
-    
-    output_file << tab_sizes[i] << " " <<  tab_mflops[i] << endl ;
-    i++;
+  output_file << endl;

+  while (tab_sizes[i] < stop_cut_size) {
+    i++;
+  }
+
+  while (i < size) {
+    output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
+    i++;
  }

  output_file.close();
-
 }

 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
+void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
+  ifstream input_file(filename.c_str(), ios::in);

-  ifstream input_file (filename.c_str(),ios::in) ;
-
-  if (!input_file){
-    INFOS("!!! Error opening "<<filename);
+  if (!input_file) {
+    INFOS("!!! Error opening " << filename);
    exit(0);
  }
-  
-  int nb_point=0;
-  int size=0;
-  double mflops=0;

-  while (input_file >> size >> mflops ){
+  int nb_point = 0;
+  int size = 0;
+  double mflops = 0;
+
+  while (input_file >> size >> mflops) {
    nb_point++;
    tab_sizes.push_back(size);
    tab_mflops.push_back(mflops);
@ -128,4 +111,3 @@ void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<doubl

  input_file.close();
 }
-
--- a/bench/btl/data/smooth.cxx
+++ b/bench/btl/data/smooth.cxx
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  smooth.cxx
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #include "utilities.h"
 #include <vector>
 #include <deque>
@ -28,165 +28,133 @@

 using namespace std;

-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
-void write_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
-void smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width);
-void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width);
+void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
+void write_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
+void smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width);
+void centered_smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width);

 /////////////////////////////////////////////////////////////////////////////////////////////////

-int main( int argc , char *argv[] )
-{
-
+int main(int argc, char *argv[]) {
  // input data

-  if (argc<3){
+  if (argc < 3) {
    INFOS("!!! Error ... usage : main filename window_half_width smooth_filename");
    exit(0);
  }
  INFOS(argc);

-  int window_half_width=atoi(argv[2]);
+  int window_half_width = atoi(argv[2]);
+
+  string filename = argv[1];
+  string smooth_filename = argv[3];

-  string filename=argv[1];
-  string smooth_filename=argv[3];
-  
  INFOS(filename);
-  INFOS("window_half_width="<<window_half_width);
+  INFOS("window_half_width=" << window_half_width);

  vector<int> tab_sizes;
  vector<double> tab_mflops;

-  read_xy_file(filename,tab_sizes,tab_mflops);
+  read_xy_file(filename, tab_sizes, tab_mflops);

  // smoothing

  vector<double> smooth_tab_mflops;

-  //smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
-  centered_smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
+  // smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
+  centered_smooth_curve(tab_mflops, smooth_tab_mflops, window_half_width);

  // output result

-  write_xy_file(smooth_filename,tab_sizes,smooth_tab_mflops);
-  
-
+  write_xy_file(smooth_filename, tab_sizes, smooth_tab_mflops);
 }

 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-template<class VECTOR>
-double weighted_mean(const VECTOR & data)
-{
-
-  double mean=0.0;
-  
-  for (int i=0 ; i<data.size() ; i++){
-
-    mean+=data[i];
+template <class VECTOR>
+double weighted_mean(const VECTOR &data) {
+  double mean = 0.0;

+  for (int i = 0; i < data.size(); i++) {
+    mean += data[i];
  }

-  return mean/double(data.size()) ;
-
-}    
-
-
-
+  return mean / double(data.size());
+}

 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

+void smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
+  int window_width = 2 * window_half_width + 1;

-void smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width){
-  
-  int window_width=2*window_half_width+1;
-
-  int size=tab_mflops.size();
+  int size = tab_mflops.size();

  vector<double> sample(window_width);
-  
-  for (int i=0 ; i < size ; i++){
-    
-    for ( int j=0 ; j < window_width ; j++ ){
-      
-      int shifted_index=i+j-window_half_width;
-      if (shifted_index<0) shifted_index=0;
-      if (shifted_index>size-1) shifted_index=size-1;
-      sample[j]=tab_mflops[shifted_index];
-      
+
+  for (int i = 0; i < size; i++) {
+    for (int j = 0; j < window_width; j++) {
+      int shifted_index = i + j - window_half_width;
+      if (shifted_index < 0) shifted_index = 0;
+      if (shifted_index > size - 1) shifted_index = size - 1;
+      sample[j] = tab_mflops[shifted_index];
    }

    smooth_tab_mflops.push_back(weighted_mean(sample));
-
  }
-
 }

-void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width){
-  
-  int max_window_width=2*window_half_width+1;
+void centered_smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
+  int max_window_width = 2 * window_half_width + 1;

-  int size=tab_mflops.size();
-
-  
-  for (int i=0 ; i < size ; i++){
+  int size = tab_mflops.size();

+  for (int i = 0; i < size; i++) {
    deque<double> sample;

-    
    sample.push_back(tab_mflops[i]);

-    for ( int j=1 ; j <= window_half_width ; j++ ){
-      
-      int before=i-j;
-      int after=i+j;
-      
-      if ((before>=0)&&(after<size)) // inside of the vector
-	{ 
-	  sample.push_front(tab_mflops[before]);
-	  sample.push_back(tab_mflops[after]);
-	}
+    for (int j = 1; j <= window_half_width; j++) {
+      int before = i - j;
+      int after = i + j;
+
+      if ((before >= 0) && (after < size))  // inside of the vector
+      {
+        sample.push_front(tab_mflops[before]);
+        sample.push_back(tab_mflops[after]);
+      }
    }
-    
+
    smooth_tab_mflops.push_back(weighted_mean(sample));
-    
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void write_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
+  ofstream output_file(filename.c_str(), ios::out);
+
+  for (int i = 0; i < tab_sizes.size(); i++) {
+    output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
  }

-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void write_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
-
-  ofstream output_file (filename.c_str(),ios::out) ;
-  
-  for (int i=0 ; i < tab_sizes.size() ; i++)
-    {
-      output_file << tab_sizes[i] << " " <<  tab_mflops[i] << endl ;
-    }
-  
  output_file.close();
-
 }

-
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
+void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
+  ifstream input_file(filename.c_str(), ios::in);

-  ifstream input_file (filename.c_str(),ios::in) ;
-
-  if (!input_file){
-    INFOS("!!! Error opening "<<filename);
+  if (!input_file) {
+    INFOS("!!! Error opening " << filename);
    exit(0);
  }
-  
-  int nb_point=0;
-  int size=0;
-  double mflops=0;

-  while (input_file >> size >> mflops ){
+  int nb_point = 0;
+  int size = 0;
+  double mflops = 0;
+
+  while (input_file >> size >> mflops) {
    nb_point++;
    tab_sizes.push_back(size);
    tab_mflops.push_back(mflops);
@ -195,4 +163,3 @@ void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<doubl

  input_file.close();
 }
-
--- a/bench/btl/generic_bench/bench.hh
+++ b/bench/btl/generic_bench/bench.hh
@ -33,19 +33,17 @@
 // #include "timers/x86_perf_analyzer.hh"
 // #include "timers/STL_perf_analyzer.hh"
 #ifdef HAVE_MKL
-extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
+extern "C" void cblas_saxpy(const int, const float, const float *, const int, float *, const int);
 #endif
 using namespace std;

-template <template<class> class Perf_Analyzer, class Action>
-BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
-{
-  if (BtlConfig::skipAction(Action::name()))
-    return;
+template <template <class> class Perf_Analyzer, class Action>
+BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
+  if (BtlConfig::skipAction(Action::name())) return;

-  string filename="bench_"+Action::name()+".dat";
+  string filename = "bench_" + Action::name() + ".dat";

-  INFOS("starting " <<filename);
+  INFOS("starting " << filename);

  // utilities

@ -53,7 +51,7 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
  std::vector<int> tab_sizes(nb_point);

  // matrices and vector size calculations
-  size_lin_log(nb_point,size_min,size_max,tab_sizes);
+  size_lin_log(nb_point, size_min, size_max, tab_sizes);

  std::vector<int> oldSizes;
  std::vector<double> oldFlops;
@ -62,29 +60,26 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )

  // loop on matrix size
  Perf_Analyzer<Action> perf_action;
-  for (int i=nb_point-1;i>=0;i--)
-  {
-    //INFOS("size=" <<tab_sizes[i]<<"   ("<<nb_point-i<<"/"<<nb_point<<")");
-    std::cout << " " << "size = " << tab_sizes[i] << "  " << std::flush;
+  for (int i = nb_point - 1; i >= 0; i--) {
+    // INFOS("size=" <<tab_sizes[i]<<"   ("<<nb_point-i<<"/"<<nb_point<<")");
+    std::cout << " "
+              << "size = " << tab_sizes[i] << "  " << std::flush;

    BTL_DISABLE_SSE_EXCEPTIONS();
-    #ifdef HAVE_MKL
+#ifdef HAVE_MKL
    {
      float dummy;
-      cblas_saxpy(1,0,&dummy,1,&dummy,1);
+      cblas_saxpy(1, 0, &dummy, 1, &dummy, 1);
    }
-    #endif
+#endif

    tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
    std::cout << tab_mflops[i];
-    
-    if (hasOldResults)
-    {
-      while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
-        --oldi;
-      if (oldi>=0 && oldSizes[oldi]==tab_sizes[i])
-      {
-        if (oldFlops[oldi]<tab_mflops[i])
+
+    if (hasOldResults) {
+      while (oldi >= 0 && oldSizes[oldi] > tab_sizes[i]) --oldi;
+      if (oldi >= 0 && oldSizes[oldi] == tab_sizes[i]) {
+        if (oldFlops[oldi] < tab_mflops[i])
          std::cout << "\t > ";
        else
          std::cout << "\t < ";
@ -92,48 +87,38 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
      }
      --oldi;
    }
-    std::cout << " MFlops    (" << nb_point-i << "/" << nb_point << ")" << std::endl;
+    std::cout << " MFlops    (" << nb_point - i << "/" << nb_point << ")" << std::endl;
  }

-  if (!BtlConfig::Instance.overwriteResults)
-  {
-    if (hasOldResults)
-    {
+  if (!BtlConfig::Instance.overwriteResults) {
+    if (hasOldResults) {
      // merge the two data
      std::vector<int> newSizes;
      std::vector<double> newFlops;
-      unsigned int i=0;
-      unsigned int j=0;
-      while (i<tab_sizes.size() && j<oldSizes.size())
-      {
-        if (tab_sizes[i] == oldSizes[j])
-        {
+      unsigned int i = 0;
+      unsigned int j = 0;
+      while (i < tab_sizes.size() && j < oldSizes.size()) {
+        if (tab_sizes[i] == oldSizes[j]) {
          newSizes.push_back(tab_sizes[i]);
          newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
          ++i;
          ++j;
-        }
-        else if (tab_sizes[i] < oldSizes[j])
-        {
+        } else if (tab_sizes[i] < oldSizes[j]) {
          newSizes.push_back(tab_sizes[i]);
          newFlops.push_back(tab_mflops[i]);
          ++i;
-        }
-        else
-        {
+        } else {
          newSizes.push_back(oldSizes[j]);
          newFlops.push_back(oldFlops[j]);
          ++j;
        }
      }
-      while (i<tab_sizes.size())
-      {
+      while (i < tab_sizes.size()) {
        newSizes.push_back(tab_sizes[i]);
        newFlops.push_back(tab_mflops[i]);
        ++i;
      }
-      while (j<oldSizes.size())
-      {
+      while (j < oldSizes.size()) {
        newSizes.push_back(oldSizes[j]);
        newFlops.push_back(oldFlops[j]);
        ++j;
@ -144,25 +129,21 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
  }

  // dump the result in a file  :
-  dump_xy_file(tab_sizes,tab_mflops,filename);
-
+  dump_xy_file(tab_sizes, tab_mflops, filename);
 }

 // default Perf Analyzer

 template <class Action>
-BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ){
-
+BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
  // if the rdtsc is not available :
-  bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+  bench<Portable_Perf_Analyzer, Action>(size_min, size_max, nb_point);
  // if the rdtsc is available :
-//    bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-
+  //    bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);

  // Only for small problem size. Otherwise it will be too long
-//   bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-//   bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-
+  //   bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+  //   bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 }

 #endif
--- a/bench/btl/generic_bench/btl.hh
+++ b/bench/btl/generic_bench/btl.hh
@ -39,7 +39,7 @@
 #endif

 #if (defined __GNUC__)
-#define BTL_ASM_COMMENT(X)  asm("#" X)
+#define BTL_ASM_COMMENT(X) asm("#" X)
 #else
 #define BTL_ASM_COMMENT(X)
 #endif
@ -47,164 +47,131 @@
 #ifdef __SSE__
 #include "xmmintrin.h"
 // This enables flush to zero (FTZ) and denormals are zero (DAZ) modes:
-#define BTL_DISABLE_SSE_EXCEPTIONS()  { _mm_setcsr(_mm_getcsr() | 0x8040); }
+#define BTL_DISABLE_SSE_EXCEPTIONS() \
+  { _mm_setcsr(_mm_getcsr() | 0x8040); }
 #else
 #define BTL_DISABLE_SSE_EXCEPTIONS()
 #endif

 /** Enhanced std::string
-*/
-class BtlString : public std::string
-{
-public:
-    BtlString() : std::string() {}
-    BtlString(const BtlString& str) : std::string(static_cast<const std::string&>(str)) {}
-    BtlString(const std::string& str) : std::string(str) {}
-    BtlString(const char* str) : std::string(str) {}
+ */
+class BtlString : public std::string {
+ public:
+  BtlString() : std::string() {}
+  BtlString(const BtlString& str) : std::string(static_cast<const std::string&>(str)) {}
+  BtlString(const std::string& str) : std::string(str) {}
+  BtlString(const char* str) : std::string(str) {}

-    operator const char* () const { return c_str(); }
+  operator const char*() const { return c_str(); }

-    void trim( bool left = true, bool right = true )
-    {
-        int lspaces, rspaces, len = length(), i;
-        lspaces = rspaces = 0;
+  void trim(bool left = true, bool right = true) {
+    int lspaces, rspaces, len = length(), i;
+    lspaces = rspaces = 0;

-        if ( left )
-            for (i=0; i<len && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); ++lspaces,++i);
+    if (left)
+      for (i = 0; i < len && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); ++lspaces, ++i)
+        ;

-        if ( right && lspaces < len )
-            for(i=len-1; i>=0 && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); rspaces++,i--);
+    if (right && lspaces < len)
+      for (i = len - 1; i >= 0 && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); rspaces++, i--)
+        ;

-        *this = substr(lspaces, len-lspaces-rspaces);
-    }
+    *this = substr(lspaces, len - lspaces - rspaces);
+  }

-    std::vector<BtlString> split( const BtlString& delims = "\t\n ") const
-    {
-        std::vector<BtlString> ret;
-        unsigned int numSplits = 0;
-        size_t start, pos;
-        start = 0;
-        do
-        {
-            pos = find_first_of(delims, start);
-            if (pos == start)
-            {
-                ret.push_back("");
-                start = pos + 1;
-            }
-            else if (pos == npos)
-                ret.push_back( substr(start) );
-            else
-            {
-                ret.push_back( substr(start, pos - start) );
-                start = pos + 1;
-            }
-            //start = find_first_not_of(delims, start);
-            ++numSplits;
-        } while (pos != npos);
-        return ret;
-    }
+  std::vector<BtlString> split(const BtlString& delims = "\t\n ") const {
+    std::vector<BtlString> ret;
+    unsigned int numSplits = 0;
+    size_t start, pos;
+    start = 0;
+    do {
+      pos = find_first_of(delims, start);
+      if (pos == start) {
+        ret.push_back("");
+        start = pos + 1;
+      } else if (pos == npos)
+        ret.push_back(substr(start));
+      else {
+        ret.push_back(substr(start, pos - start));
+        start = pos + 1;
+      }
+      // start = find_first_not_of(delims, start);
+      ++numSplits;
+    } while (pos != npos);
+    return ret;
+  }

-    bool endsWith(const BtlString& str) const
-    {
-        if(str.size()>this->size())
-            return false;
-        return this->substr(this->size()-str.size(),str.size()) == str;
-    }
-    bool contains(const BtlString& str) const
-    {
-        return this->find(str)<this->size();
-    }
-    bool beginsWith(const BtlString& str) const
-    {
-        if(str.size()>this->size())
-            return false;
-        return this->substr(0,str.size()) == str;
-    }
+  bool endsWith(const BtlString& str) const {
+    if (str.size() > this->size()) return false;
+    return this->substr(this->size() - str.size(), str.size()) == str;
+  }
+  bool contains(const BtlString& str) const { return this->find(str) < this->size(); }
+  bool beginsWith(const BtlString& str) const {
+    if (str.size() > this->size()) return false;
+    return this->substr(0, str.size()) == str;
+  }

-    BtlString toLowerCase( void )
-    {
-        std::transform(begin(), end(), begin(), static_cast<int(*)(int)>(::tolower) );
-        return *this;
-    }
-    BtlString toUpperCase( void )
-    {
-        std::transform(begin(), end(), begin(), static_cast<int(*)(int)>(::toupper) );
-        return *this;
-    }
+  BtlString toLowerCase(void) {
+    std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::tolower));
+    return *this;
+  }
+  BtlString toUpperCase(void) {
+    std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::toupper));
+    return *this;
+  }

-    /** Case insensitive comparison.
-    */
-    bool isEquiv(const BtlString& str) const
-    {
-        BtlString str0 = *this;
-        str0.toLowerCase();
-        BtlString str1 = str;
-        str1.toLowerCase();
-        return str0 == str1;
-    }
+  /** Case insensitive comparison.
+   */
+  bool isEquiv(const BtlString& str) const {
+    BtlString str0 = *this;
+    str0.toLowerCase();
+    BtlString str1 = str;
+    str1.toLowerCase();
+    return str0 == str1;
+  }

-    /** Decompose the current string as a path and a file.
-        For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext"
-    */
-    void decomposePathAndFile(BtlString& path, BtlString& filename) const
-    {
-        std::vector<BtlString> elements = this->split("/\\");
-        path = "";
-        filename = elements.back();
-        elements.pop_back();
-        if (this->at(0)=='/')
-            path = "/";
-        for (unsigned int i=0 ; i<elements.size() ; ++i)
-            path += elements[i] + "/";
-    }
+  /** Decompose the current string as a path and a file.
+      For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext"
+  */
+  void decomposePathAndFile(BtlString& path, BtlString& filename) const {
+    std::vector<BtlString> elements = this->split("/\\");
+    path = "";
+    filename = elements.back();
+    elements.pop_back();
+    if (this->at(0) == '/') path = "/";
+    for (unsigned int i = 0; i < elements.size(); ++i) path += elements[i] + "/";
+  }
 };

-class BtlConfig
-{
-public:
-  BtlConfig()
-    : overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES)
-  {
-    char * _config;
-    _config = getenv ("BTL_CONFIG");
-    if (_config!=NULL)
-    {
+class BtlConfig {
+ public:
+  BtlConfig() : overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES) {
+    char* _config;
+    _config = getenv("BTL_CONFIG");
+    if (_config != NULL) {
      std::vector<BtlString> config = BtlString(_config).split(" \t\n");
-      for (unsigned int i = 0; i<config.size(); i++)
-      {
-        if (config[i].beginsWith("-a"))
-        {
-          if (i+1==config.size())
-          {
+      for (unsigned int i = 0; i < config.size(); i++) {
+        if (config[i].beginsWith("-a")) {
+          if (i + 1 == config.size()) {
            std::cerr << "error processing option: " << config[i] << "\n";
            exit(2);
          }
-          Instance.m_selectedActionNames = config[i+1].split(":");
+          Instance.m_selectedActionNames = config[i + 1].split(":");

          i += 1;
-        }
-        else if (config[i].beginsWith("-t"))
-        {
-          if (i+1==config.size())
-          {
+        } else if (config[i].beginsWith("-t")) {
+          if (i + 1 == config.size()) {
            std::cerr << "error processing option: " << config[i] << "\n";
            exit(2);
          }
-          Instance.tries = atoi(config[i+1].c_str());
+          Instance.tries = atoi(config[i + 1].c_str());

          i += 1;
-        }
-        else if (config[i].beginsWith("--overwrite"))
-        {
+        } else if (config[i].beginsWith("--overwrite")) {
          Instance.overwriteResults = true;
-        }
-        else if (config[i].beginsWith("--nocheck"))
-        {
+        } else if (config[i].beginsWith("--nocheck")) {
          Instance.checkResults = false;
-        }
-        else if (config[i].beginsWith("--real"))
-        {
+        } else if (config[i].beginsWith("--real")) {
          Instance.realclock = true;
        }
      }
@ -213,15 +180,12 @@ public:
    BTL_DISABLE_SSE_EXCEPTIONS();
  }

-  BTL_DONT_INLINE static bool skipAction(const std::string& _name)
-  {
-    if (Instance.m_selectedActionNames.empty())
-      return false;
+  BTL_DONT_INLINE static bool skipAction(const std::string& _name) {
+    if (Instance.m_selectedActionNames.empty()) return false;

    BtlString name(_name);
-    for (unsigned int i=0; i<Instance.m_selectedActionNames.size(); ++i)
-      if (name.contains(Instance.m_selectedActionNames[i]))
-        return false;
+    for (unsigned int i = 0; i < Instance.m_selectedActionNames.size(); ++i)
+      if (name.contains(Instance.m_selectedActionNames[i])) return false;

    return true;
  }
@ -232,11 +196,10 @@ public:
  bool realclock;
  int tries;

-protected:
+ protected:
  std::vector<BtlString> m_selectedActionNames;
 };

-#define BTL_MAIN \
-  BtlConfig BtlConfig::Instance
+#define BTL_MAIN BtlConfig BtlConfig::Instance

-#endif // BTL_HH
+#endif  // BTL_HH
--- a/bench/btl/generic_bench/init/init_function.hh
+++ b/bench/btl/generic_bench/init/init_function.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  init_function.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:18 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,39 +16,20 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef INIT_FUNCTION_HH
 #define INIT_FUNCTION_HH

-double simple_function(int index)
-{
-  return index;
-}
+double simple_function(int index) { return index; }

-double simple_function(int index_i, int index_j)
-{
-  return index_i+index_j;
-}
+double simple_function(int index_i, int index_j) { return index_i + index_j; }

-double pseudo_random(int /*index*/)
-{
-  return std::rand()/double(RAND_MAX);
-}
+double pseudo_random(int /*index*/) { return std::rand() / double(RAND_MAX); }

-double pseudo_random(int /*index_i*/, int /*index_j*/)
-{
-  return std::rand()/double(RAND_MAX);
-}
+double pseudo_random(int /*index_i*/, int /*index_j*/) { return std::rand() / double(RAND_MAX); }

+double null_function(int /*index*/) { return 0.0; }

-double null_function(int /*index*/)
-{
-  return 0.0;
-}
-
-double null_function(int /*index_i*/, int /*index_j*/)
-{
-  return 0.0;
-}
+double null_function(int /*index_i*/, int /*index_j*/) { return 0.0; }

 #endif
--- a/bench/btl/generic_bench/init/init_matrix.hh
+++ b/bench/btl/generic_bench/init/init_matrix.hh
@ -24,38 +24,35 @@
 //            resize() method
 //            [] operator for setting element
 //            value_type defined
-template<double init_function(int,int), class Vector>
-BTL_DONT_INLINE void init_row(Vector & X, int size, int row){
-
+template <double init_function(int, int), class Vector>
+BTL_DONT_INLINE void init_row(Vector& X, int size, int row) {
  X.resize(size);

-  for (unsigned int j=0;j<X.size();j++){
-    X[j]=typename Vector::value_type(init_function(row,j));
+  for (unsigned int j = 0; j < X.size(); j++) {
+    X[j] = typename Vector::value_type(init_function(row, j));
  }
 }

-
 // Matrix is a Vector of Vector
 // The Matrix class must satisfy the following part of STL vector concept :
 //            resize() method
 //            [] operator for setting rows
-template<double init_function(int,int),class Vector>
-BTL_DONT_INLINE void init_matrix(Vector &  A, int size){
+template <double init_function(int, int), class Vector>
+BTL_DONT_INLINE void init_matrix(Vector& A, int size) {
  A.resize(size);
-  for (unsigned int row=0; row<A.size() ; row++){
-    init_row<init_function>(A[row],size,row);
+  for (unsigned int row = 0; row < A.size(); row++) {
+    init_row<init_function>(A[row], size, row);
  }
 }

-template<double init_function(int,int),class Matrix>
-BTL_DONT_INLINE void init_matrix_symm(Matrix&  A, int size){
+template <double init_function(int, int), class Matrix>
+BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size) {
  A.resize(size);
-  for (unsigned int row=0; row<A.size() ; row++)
-    A[row].resize(size);
-  for (unsigned int row=0; row<A.size() ; row++){
-    A[row][row] = init_function(row,row);
-    for (unsigned int col=0; col<row ; col++){
-      double x = init_function(row,col);
+  for (unsigned int row = 0; row < A.size(); row++) A[row].resize(size);
+  for (unsigned int row = 0; row < A.size(); row++) {
+    A[row][row] = init_function(row, row);
+    for (unsigned int col = 0; col < row; col++) {
+      double x = init_function(row, col);
      A[row][col] = A[col][row] = x;
    }
  }
--- a/bench/btl/generic_bench/init/init_vector.hh
+++ b/bench/btl/generic_bench/init/init_vector.hh
@ -24,13 +24,12 @@
 //            resize() method
 //            [] operator for setting element
 //            value_type defined
-template<double init_function(int), class Vector>
-void init_vector(Vector & X, int size){
-
+template <double init_function(int), class Vector>
+void init_vector(Vector& X, int size) {
  X.resize(size);

-  for (unsigned int i=0;i<X.size();i++){
-    X[i]=typename Vector::value_type(init_function(i));
+  for (unsigned int i = 0; i < X.size(); i++) {
+    X[i] = typename Vector::value_type(init_function(i));
  }
 }

--- a/bench/btl/generic_bench/static/bench_static.hh
+++ b/bench/btl/generic_bench/static/bench_static.hh
@ -32,14 +32,11 @@

 using namespace std;

+template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
+BTL_DONT_INLINE void bench_static(void) {
+  if (BtlConfig::skipAction(Action<Interface<REAL_TYPE, 10> >::name())) return;

-template <template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
-BTL_DONT_INLINE  void bench_static(void)
-{
-  if (BtlConfig::skipAction(Action<Interface<REAL_TYPE,10> >::name()))
-    return;
-
-  string filename = "bench_" + Action<Interface<REAL_TYPE,10> >::name() + ".dat";
+  string filename = "bench_" + Action<Interface<REAL_TYPE, 10> >::name() + ".dat";

  INFOS("starting " << filename);

@ -48,33 +45,17 @@ BTL_DONT_INLINE  void bench_static(void)
  std::vector<double> tab_mflops;
  std::vector<double> tab_sizes;

-  static_size_generator<max_size,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
+  static_size_generator<max_size, Perf_Analyzer, Action, Interface>::go(tab_sizes, tab_mflops);

-  dump_xy_file(tab_sizes,tab_mflops,filename);
+  dump_xy_file(tab_sizes, tab_mflops, filename);
 }

 // default Perf Analyzer
-template <template<class> class Action, template<class,int> class Interface>
-BTL_DONT_INLINE  void bench_static(void)
-{
-  bench_static<Portable_Perf_Analyzer,Action,Interface>();
-  //bench_static<Mixed_Perf_Analyzer,Action,Interface>();
-  //bench_static<X86_Perf_Analyzer,Action,Interface>();
+template <template <class> class Action, template <class, int> class Interface>
+BTL_DONT_INLINE void bench_static(void) {
+  bench_static<Portable_Perf_Analyzer, Action, Interface>();
+  // bench_static<Mixed_Perf_Analyzer,Action,Interface>();
+  // bench_static<X86_Perf_Analyzer,Action,Interface>();
 }

 #endif
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/bench/btl/generic_bench/static/intel_bench_fixed_size.hh
+++ b/bench/btl/generic_bench/static/intel_bench_fixed_size.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  intel_bench_fixed_size.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  mar déc 3 18:59:37 CET 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef _BENCH_FIXED_SIZE_HH_
 #define _BENCH_FIXED_SIZE_HH_

@ -24,43 +24,37 @@
 #include "function_time.hh"

 template <class Action>
-double bench_fixed_size(int size, unsigned long long  & nb_calc,unsigned long long & nb_init)
-{
-  
+double bench_fixed_size(int size, unsigned long long& nb_calc, unsigned long long& nb_init) {
  Action action(size);
-  
-  double time_baseline=time_init(nb_init,action);
+
+  double time_baseline = time_init(nb_init, action);

  while (time_baseline < MIN_TIME) {
-
-    //INFOS("nb_init="<<nb_init);
-    //INFOS("time_baseline="<<time_baseline);
-    nb_init*=2;
-    time_baseline=time_init(nb_init,action);
+    // INFOS("nb_init="<<nb_init);
+    // INFOS("time_baseline="<<time_baseline);
+    nb_init *= 2;
+    time_baseline = time_init(nb_init, action);
  }
-  
-  time_baseline=time_baseline/(double(nb_init));
-  
-  double time_action=time_calculate(nb_calc,action);
-  
+
+  time_baseline = time_baseline / (double(nb_init));
+
+  double time_action = time_calculate(nb_calc, action);
+
  while (time_action < MIN_TIME) {
-    
-    nb_calc*=2;
-    time_action=time_calculate(nb_calc,action);
+    nb_calc *= 2;
+    time_action = time_calculate(nb_calc, action);
  }

-  INFOS("nb_init="<<nb_init);
-  INFOS("nb_calc="<<nb_calc);
-  
-  
-  time_action=time_action/(double(nb_calc));
-  
+  INFOS("nb_init=" << nb_init);
+  INFOS("nb_calc=" << nb_calc);
+
+  time_action = time_action / (double(nb_calc));
+
  action.check_result();
-  
-  time_action=time_action-time_baseline;

-  return action.nb_op_base()/(time_action*1000000.0);
+  time_action = time_action - time_baseline;

+  return action.nb_op_base() / (time_action * 1000000.0);
 }

 #endif
--- a/bench/btl/generic_bench/static/static_size_generator.hh
+++ b/bench/btl/generic_bench/static/static_size_generator.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  static_size_generator.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  mar déc 3 18:59:36 CET 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,42 +16,37 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef _STATIC_SIZE_GENERATOR_HH
 #define _STATIC_SIZE_GENERATOR_HH
 #include <vector>

 using namespace std;

-//recursive generation of statically defined matrix and vector sizes
+// recursive generation of statically defined matrix and vector sizes

-template <int SIZE,template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface> 
-struct static_size_generator{
-  static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
-  {
+template <int SIZE, template <class> class Perf_Analyzer, template <class> class Action,
+          template <class, int> class Interface>
+struct static_size_generator {
+  static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
    tab_sizes.push_back(SIZE);
    std::cout << tab_sizes.back() << " \t" << std::flush;
-    Perf_Analyzer<Action<Interface<REAL_TYPE,SIZE> > > perf_action;
+    Perf_Analyzer<Action<Interface<REAL_TYPE, SIZE> > > perf_action;
    tab_mflops.push_back(perf_action.eval_mflops(SIZE));
    std::cout << tab_mflops.back() << " MFlops" << std::endl;
-    static_size_generator<SIZE-1,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
+    static_size_generator<SIZE - 1, Perf_Analyzer, Action, Interface>::go(tab_sizes, tab_mflops);
  };
 };

-//recursion end
+// recursion end

-template <template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface> 
-struct static_size_generator<1,Perf_Analyzer,Action,Interface>{  
-  static  void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
-  {
+template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
+struct static_size_generator<1, Perf_Analyzer, Action, Interface> {
+  static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
    tab_sizes.push_back(1);
-    Perf_Analyzer<Action<Interface<REAL_TYPE,1> > > perf_action;
+    Perf_Analyzer<Action<Interface<REAL_TYPE, 1> > > perf_action;
    tab_mflops.push_back(perf_action.eval_mflops(1));
  };
 };

 #endif
-  
-  
-  
-  
--- a/bench/btl/generic_bench/timers/STL_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/STL_perf_analyzer.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  STL_perf_analyzer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  mar déc 3 18:59:35 CET 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,42 +16,35 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef _STL_PERF_ANALYSER_HH
 #define _STL_PERF_ANALYSER_HH

 #include "STL_timer.hh"
 #include "bench_parameter.hh"

-template<class ACTION>
-class STL_Perf_Analyzer{
-public:  
-  STL_Perf_Analyzer(unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
-  {
+template <class ACTION>
+class STL_Perf_Analyzer {
+ public:
+  STL_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
    MESSAGE("STL_Perf_Analyzer Ctor");
-  }; 
-  STL_Perf_Analyzer( const STL_Perf_Analyzer & ){
+  };
+  STL_Perf_Analyzer(const STL_Perf_Analyzer&) {
    INFOS("Copy Ctor not implemented");
    exit(0);
  };
-  ~STL_Perf_Analyzer( void ){
-    MESSAGE("STL_Perf_Analyzer Dtor");
-  };
-  
-  
-  inline double eval_mflops(int size)
-  {
+  ~STL_Perf_Analyzer(void) { MESSAGE("STL_Perf_Analyzer Dtor"); };

+  inline double eval_mflops(int size) {
    ACTION action(size);

    _chronos.start_baseline(_nb_sample);
-      
-    do {

+    do {
      action.initialize();
    } while (_chronos.check());

-    double baseline_time=_chronos.get_time();
+    double baseline_time = _chronos.get_time();

    _chronos.start(_nb_sample);
    do {
@ -59,24 +52,19 @@ public:
      action.calculate();
    } while (_chronos.check());

-    double calculate_time=_chronos.get_time();
+    double calculate_time = _chronos.get_time();

-    double corrected_time=calculate_time-baseline_time;
-    
-    //    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;    
-    
-    return action.nb_op_base()/(corrected_time*1000000.0);
-    //return action.nb_op_base()/(calculate_time*1000000.0);
-    
+    double corrected_time = calculate_time - baseline_time;
+
+    //    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
+
+    return action.nb_op_base() / (corrected_time * 1000000.0);
+    // return action.nb_op_base()/(calculate_time*1000000.0);
  }
-private:

+ private:
  STL_Timer _chronos;
  unsigned long long _nb_sample;
-
-  
 };

-  
-  
 #endif
--- a/bench/btl/generic_bench/timers/STL_timer.hh
+++ b/bench/btl/generic_bench/timers/STL_timer.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  STL_Timer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  mar déc 3 18:59:35 CET 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 // STL Timer Class. Adapted (L.P.) from the timer class by Musser et Al
 // described int the Book : STL Tutorial and reference guide.
 // Define a timer class for analyzing algorithm performance.
@ -28,10 +28,10 @@
 using namespace std;

 class STL_Timer {
-public:
-  STL_Timer(){ baseline = false; };  // Default constructor
+ public:
+  STL_Timer() { baseline = false; };  // Default constructor
  // Start a series of r trials:
-  void start(unsigned int r){
+  void start(unsigned int r) {
    reps = r;
    count = 0;
    iterations.clear();
@ -39,30 +39,28 @@ public:
    initial = time(0);
  };
  // Start a series of r trials to determine baseline time:
-  void start_baseline(unsigned int r)
-  {
+  void start_baseline(unsigned int r) {
    baseline = true;
    start(r);
  }
  // Returns true if the trials have been completed, else false
-  bool check()
-  {
+  bool check() {
    ++count;
    final = time(0);
    if (initial < final) {
-      iterations.push_back(count);  
+      iterations.push_back(count);
      initial = final;
      count = 0;
    }
    return (iterations.size() < reps);
  };
  // Returns the results for external use
-  double get_time( void )
-  {
+  double get_time(void) {
    sort(iterations.begin(), iterations.end());
-    return 1.0/iterations[reps/2];
+    return 1.0 / iterations[reps / 2];
  };
-private:
+
+ private:
  unsigned int reps;  // Number of trials
  // For storing loop iterations of a trial
  vector<long> iterations;
@ -72,7 +70,6 @@ private:
  unsigned long count;
  // true if this is a baseline computation, false otherwise
  bool baseline;
-  // For recording the baseline time 
+  // For recording the baseline time
  double baseline_time;
 };
-
--- a/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  mixed_perf_analyzer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  mar déc 3 18:59:36 CET 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef _MIXED_PERF_ANALYSER_HH
 #define _MIXED_PERF_ANALYSER_HH

@ -25,49 +25,34 @@

 // choose portable perf analyzer for long calculations and x86 analyser for short ones

-
-template<class Action>
-class Mixed_Perf_Analyzer{
-  
-public:  
-  Mixed_Perf_Analyzer( void ):_x86pa(),_ppa(),_use_ppa(true)
-  {
-    MESSAGE("Mixed_Perf_Analyzer Ctor");
-  }; 
-  Mixed_Perf_Analyzer( const Mixed_Perf_Analyzer & ){
+template <class Action>
+class Mixed_Perf_Analyzer {
+ public:
+  Mixed_Perf_Analyzer(void) : _x86pa(), _ppa(), _use_ppa(true) { MESSAGE("Mixed_Perf_Analyzer Ctor"); };
+  Mixed_Perf_Analyzer(const Mixed_Perf_Analyzer&) {
    INFOS("Copy Ctor not implemented");
    exit(0);
  };
-  ~Mixed_Perf_Analyzer( void ){
-    MESSAGE("Mixed_Perf_Analyzer Dtor");
-  };
-    
-  
-  inline double eval_mflops(int size)
-  {
+  ~Mixed_Perf_Analyzer(void) { MESSAGE("Mixed_Perf_Analyzer Dtor"); };

-    double result=0.0;
-    if (_use_ppa){      
-      result=_ppa.eval_mflops(size);
-      if (_ppa.get_nb_calc()>DEFAULT_NB_SAMPLE){_use_ppa=false;}      
-    }
-    else{      
-      result=_x86pa.eval_mflops(size);
+  inline double eval_mflops(int size) {
+    double result = 0.0;
+    if (_use_ppa) {
+      result = _ppa.eval_mflops(size);
+      if (_ppa.get_nb_calc() > DEFAULT_NB_SAMPLE) {
+        _use_ppa = false;
+      }
+    } else {
+      result = _x86pa.eval_mflops(size);
    }

    return result;
  }

-private:
-
+ private:
  Portable_Perf_Analyzer<Action> _ppa;
  X86_Perf_Analyzer<Action> _x86pa;
  bool _use_ppa;
-
 };

 #endif
-
-  
-    
-  
--- a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
@ -25,38 +25,33 @@
 #include "timers/portable_timer.hh"

 template <class Action>
-class Portable_Perf_Analyzer{
-public:
-  Portable_Perf_Analyzer( ):_nb_calc(0), m_time_action(0), _chronos(){
-    MESSAGE("Portable_Perf_Analyzer Ctor");
-  };
-  Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){
+class Portable_Perf_Analyzer {
+ public:
+  Portable_Perf_Analyzer() : _nb_calc(0), m_time_action(0), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
+  Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
    INFOS("Copy Ctor not implemented");
    exit(0);
  };
-  ~Portable_Perf_Analyzer(){
-    MESSAGE("Portable_Perf_Analyzer Dtor");
-  };
+  ~Portable_Perf_Analyzer() { MESSAGE("Portable_Perf_Analyzer Dtor"); };

-  BTL_DONT_INLINE double eval_mflops(int size)
-  {
+  BTL_DONT_INLINE double eval_mflops(int size) {
    Action action(size);

-//     action.initialize();
-//     time_action = time_calculate(action);
-    while (m_time_action < MIN_TIME)
-    {
-      if(_nb_calc==0) _nb_calc = 1;
-      else            _nb_calc *= 2;
+    //     action.initialize();
+    //     time_action = time_calculate(action);
+    while (m_time_action < MIN_TIME) {
+      if (_nb_calc == 0)
+        _nb_calc = 1;
+      else
+        _nb_calc *= 2;
      action.initialize();
      m_time_action = time_calculate(action);
    }

    // optimize
-    for (int i=1; i<BtlConfig::Instance.tries; ++i)
-    {
+    for (int i = 1; i < BtlConfig::Instance.tries; ++i) {
      Action _action(size);
-      std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " ";
+      std::cout << " " << _action.nb_op_base() * _nb_calc / (m_time_action * 1e6) << " ";
      _action.initialize();
      m_time_action = std::min(m_time_action, time_calculate(_action));
    }
@ -64,40 +59,31 @@ public:
    double time_action = m_time_action / (double(_nb_calc));

    // check
-    if (BtlConfig::Instance.checkResults && size<128)
-    {
+    if (BtlConfig::Instance.checkResults && size < 128) {
      action.initialize();
      action.calculate();
      action.check_result();
    }
-    return action.nb_op_base()/(time_action*1e6);
+    return action.nb_op_base() / (time_action * 1e6);
  }

-  BTL_DONT_INLINE double time_calculate(Action & action)
-  {
+  BTL_DONT_INLINE double time_calculate(Action& action) {
    // time measurement
    action.calculate();
    _chronos.start();
-    for (unsigned int ii=0;ii<_nb_calc;ii++)
-    {
+    for (unsigned int ii = 0; ii < _nb_calc; ii++) {
      action.calculate();
    }
    _chronos.stop();
    return _chronos.user_time();
  }

-  unsigned long long get_nb_calc()
-  {
-    return _nb_calc;
-  }
+  unsigned long long get_nb_calc() { return _nb_calc; }

-
-private:
+ private:
  unsigned long long _nb_calc;
  double m_time_action;
  Portable_Timer _chronos;
-
 };

-#endif //_PORTABLE_PERF_ANALYZER_HH
-
+#endif  //_PORTABLE_PERF_ANALYZER_HH
--- a/bench/btl/generic_bench/timers/portable_perf_analyzer_old.hh
+++ b/bench/btl/generic_bench/timers/portable_perf_analyzer_old.hh
@ -24,93 +24,74 @@
 #include "timers/portable_timer.hh"

 template <class Action>
-class Portable_Perf_Analyzer{
-public:
-  Portable_Perf_Analyzer( void ):_nb_calc(1),_nb_init(1),_chronos(){
-    MESSAGE("Portable_Perf_Analyzer Ctor");
-  };
-  Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){
+class Portable_Perf_Analyzer {
+ public:
+  Portable_Perf_Analyzer(void) : _nb_calc(1), _nb_init(1), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
+  Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
    INFOS("Copy Ctor not implemented");
    exit(0);
  };
-  ~Portable_Perf_Analyzer( void ){
-    MESSAGE("Portable_Perf_Analyzer Dtor");
-  };
-
-
-
-  inline double eval_mflops(int size)
-  {
+  ~Portable_Perf_Analyzer(void) { MESSAGE("Portable_Perf_Analyzer Dtor"); };

+  inline double eval_mflops(int size) {
    Action action(size);

-//     double time_baseline = time_init(action);
-//     while (time_baseline < MIN_TIME_INIT)
-//     {
-//       _nb_init *= 2;
-//       time_baseline = time_init(action);
-//     }
-//
-//     // optimize
-//     for (int i=1; i<NB_TRIES; ++i)
-//       time_baseline = std::min(time_baseline, time_init(action));
-//
-//     time_baseline = time_baseline/(double(_nb_init));
+    //     double time_baseline = time_init(action);
+    //     while (time_baseline < MIN_TIME_INIT)
+    //     {
+    //       _nb_init *= 2;
+    //       time_baseline = time_init(action);
+    //     }
+    //
+    //     // optimize
+    //     for (int i=1; i<NB_TRIES; ++i)
+    //       time_baseline = std::min(time_baseline, time_init(action));
+    //
+    //     time_baseline = time_baseline/(double(_nb_init));

    double time_action = time_calculate(action);
-    while (time_action < MIN_TIME)
-    {
+    while (time_action < MIN_TIME) {
      _nb_calc *= 2;
      time_action = time_calculate(action);
    }

    // optimize
-    for (int i=1; i<NB_TRIES; ++i)
-      time_action = std::min(time_action, time_calculate(action));
+    for (int i = 1; i < NB_TRIES; ++i) time_action = std::min(time_action, time_calculate(action));

-//     INFOS("size="<<size);
-//     INFOS("_nb_init="<<_nb_init);
-//     INFOS("_nb_calc="<<_nb_calc);
+    //     INFOS("size="<<size);
+    //     INFOS("_nb_init="<<_nb_init);
+    //     INFOS("_nb_calc="<<_nb_calc);

    time_action = time_action / (double(_nb_calc));

    action.check_result();

-
    double time_baseline = time_init(action);
-    for (int i=1; i<NB_TRIES; ++i)
-      time_baseline = std::min(time_baseline, time_init(action));
-    time_baseline = time_baseline/(double(_nb_init));
+    for (int i = 1; i < NB_TRIES; ++i) time_baseline = std::min(time_baseline, time_init(action));
+    time_baseline = time_baseline / (double(_nb_init));

-
-
-//     INFOS("time_baseline="<<time_baseline);
-//     INFOS("time_action="<<time_action);
+    //     INFOS("time_baseline="<<time_baseline);
+    //     INFOS("time_action="<<time_action);

    time_action = time_action - time_baseline;

-//     INFOS("time_corrected="<<time_action);
+    //     INFOS("time_corrected="<<time_action);

-    return action.nb_op_base()/(time_action*1000000.0);
+    return action.nb_op_base() / (time_action * 1000000.0);
  }

-  inline double time_init(Action & action)
-  {
+  inline double time_init(Action& action) {
    // time measurement
    _chronos.start();
-    for (int ii=0; ii<_nb_init; ii++)
-      action.initialize();
+    for (int ii = 0; ii < _nb_init; ii++) action.initialize();
    _chronos.stop();
    return _chronos.user_time();
  }

-
-  inline double time_calculate(Action & action)
-  {
+  inline double time_calculate(Action& action) {
    // time measurement
    _chronos.start();
-    for (int ii=0;ii<_nb_calc;ii++)
-    {
+    for (int ii = 0; ii < _nb_calc; ii++) {
      action.initialize();
      action.calculate();
    }
@ -118,17 +99,12 @@ public:
    return _chronos.user_time();
  }

-  unsigned long long get_nb_calc( void )
-  {
-    return _nb_calc;
-  }
+  unsigned long long get_nb_calc(void) { return _nb_calc; }

-
-private:
+ private:
  unsigned long long _nb_calc;
  unsigned long long _nb_init;
  Portable_Timer _chronos;
-
 };

-#endif //_PORTABLE_PERF_ANALYZER_HH
+#endif  //_PORTABLE_PERF_ANALYZER_HH
--- a/bench/btl/generic_bench/timers/portable_timer.hh
+++ b/bench/btl/generic_bench/timers/portable_timer.hh
@ -27,10 +27,8 @@

 #include <time.h>

-
 #define USEC_IN_SEC 1000000

-
 //  timer  -------------------------------------------------------------------//

 //  A timer object measures CPU time.
@ -44,90 +42,66 @@
 #define hr_timer
 #endif*/

- class Portable_Timer
- {
-  public:
-
-   typedef struct {
+class Portable_Timer {
+ public:
+  typedef struct {
    LARGE_INTEGER start;
    LARGE_INTEGER stop;
-   } stopWatch;
+  } stopWatch;

+  Portable_Timer() {
+    startVal.QuadPart = 0;
+    stopVal.QuadPart = 0;
+    QueryPerformanceFrequency(&frequency);
+  }

-   Portable_Timer()
-   {
-	 startVal.QuadPart = 0;
-	 stopVal.QuadPart = 0;
-	 QueryPerformanceFrequency(&frequency);
-   }
+  void start() { QueryPerformanceCounter(&startVal); }

-   void start() { QueryPerformanceCounter(&startVal); }
+  void stop() { QueryPerformanceCounter(&stopVal); }

-   void stop() { QueryPerformanceCounter(&stopVal); }
-
-   double elapsed() {
-	 LARGE_INTEGER time;
-     time.QuadPart = stopVal.QuadPart - startVal.QuadPart;
-     return LIToSecs(time);
-   }
-
-   double user_time() { return elapsed(); }
+  double elapsed() {
+    LARGE_INTEGER time;
+    time.QuadPart = stopVal.QuadPart - startVal.QuadPart;
+    return LIToSecs(time);
+  }

+  double user_time() { return elapsed(); }

 private:
+  double LIToSecs(LARGE_INTEGER& L) { return ((double)L.QuadPart / (double)frequency.QuadPart); }

-   double LIToSecs(LARGE_INTEGER& L) {
-     return ((double)L.QuadPart /(double)frequency.QuadPart) ;
-   }
+  LARGE_INTEGER startVal;
+  LARGE_INTEGER stopVal;
+  LARGE_INTEGER frequency;

-   LARGE_INTEGER startVal;
-   LARGE_INTEGER stopVal;
-   LARGE_INTEGER frequency;
-
-
- }; // Portable_Timer
+};  // Portable_Timer

 #elif defined(__APPLE__)
 #include <CoreServices/CoreServices.h>
 #include <mach/mach_time.h>

-
-class Portable_Timer
-{
+class Portable_Timer {
 public:
+  Portable_Timer() {}

-  Portable_Timer()
-  {
+  void start() {
+    m_start_time = double(mach_absolute_time()) * 1e-9;
+    ;
  }

-  void start()
-  {
-    m_start_time = double(mach_absolute_time())*1e-9;;
-
+  void stop() {
+    m_stop_time = double(mach_absolute_time()) * 1e-9;
+    ;
  }

-  void stop()
-  {
-    m_stop_time = double(mach_absolute_time())*1e-9;;
+  double elapsed() { return user_time(); }

-  }
-
-  double elapsed()
-  {
-    return  user_time();
-  }
-
-  double user_time()
-  {
-    return m_stop_time - m_start_time;
-  }
-
-
-private:
+  double user_time() { return m_stop_time - m_start_time; }

+ private:
  double m_stop_time, m_start_time;

-}; // Portable_Timer (Apple)
+};  // Portable_Timer (Apple)

 #else

@ -136,51 +110,33 @@ private:
 #include <unistd.h>
 #include <sys/times.h>

-class Portable_Timer
-{
+class Portable_Timer {
 public:
+  Portable_Timer() { m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID; }

-  Portable_Timer()
-  {
-    m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID;
-  }
+  Portable_Timer(int clkid) : m_clkid(clkid) {}

-  Portable_Timer(int clkid) : m_clkid(clkid)
-  {}
-
-  void start()
-  {
+  void start() {
    timespec ts;
    clock_gettime(m_clkid, &ts);
    m_start_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
-
  }

-  void stop()
-  {
+  void stop() {
    timespec ts;
    clock_gettime(m_clkid, &ts);
    m_stop_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
-
  }

-  double elapsed()
-  {
-    return  user_time();
-  }
+  double elapsed() { return user_time(); }

-  double user_time()
-  {
-    return m_stop_time - m_start_time;
-  }
-
-
-private:
+  double user_time() { return m_stop_time - m_start_time; }

+ private:
  int m_clkid;
  double m_stop_time, m_start_time;

-}; // Portable_Timer (Linux)
+};  // Portable_Timer (Linux)

 #endif

--- a/bench/btl/generic_bench/timers/x86_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/x86_perf_analyzer.hh
@ -23,38 +23,30 @@
 #include "x86_timer.hh"
 #include "bench_parameter.hh"

-template<class ACTION>
-class X86_Perf_Analyzer{
-public:
-  X86_Perf_Analyzer( unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
-  {
+template <class ACTION>
+class X86_Perf_Analyzer {
+ public:
+  X86_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
    MESSAGE("X86_Perf_Analyzer Ctor");
    _chronos.find_frequency();
  };
-  X86_Perf_Analyzer( const X86_Perf_Analyzer & ){
+  X86_Perf_Analyzer(const X86_Perf_Analyzer&) {
    INFOS("Copy Ctor not implemented");
    exit(0);
  };
-  ~X86_Perf_Analyzer( void ){
-    MESSAGE("X86_Perf_Analyzer Dtor");
-  };
-
-
-  inline double eval_mflops(int size)
-  {
+  ~X86_Perf_Analyzer(void) { MESSAGE("X86_Perf_Analyzer Dtor"); };

+  inline double eval_mflops(int size) {
    ACTION action(size);

-    int nb_loop=5;
-    double calculate_time=0.0;
-    double baseline_time=0.0;
-
-    for (int j=0 ; j < nb_loop ; j++){
+    int nb_loop = 5;
+    double calculate_time = 0.0;
+    double baseline_time = 0.0;

+    for (int j = 0; j < nb_loop; j++) {
      _chronos.clear();

-      for(int i=0 ; i < _nb_sample  ; i++)
-      {
+      for (int i = 0; i < _nb_sample; i++) {
        _chronos.start();
        action.initialize();
        action.calculate();
@ -62,47 +54,38 @@ public:
        _chronos.add_get_click();
      }

-      calculate_time += double(_chronos.get_shortest_clicks())/_chronos.frequency();
+      calculate_time += double(_chronos.get_shortest_clicks()) / _chronos.frequency();

-      if (j==0) action.check_result();
+      if (j == 0) action.check_result();

      _chronos.clear();

-      for(int i=0 ; i < _nb_sample  ; i++)
-      {
+      for (int i = 0; i < _nb_sample; i++) {
        _chronos.start();
        action.initialize();
        _chronos.stop();
        _chronos.add_get_click();
-
      }

-      baseline_time+=double(_chronos.get_shortest_clicks())/_chronos.frequency();
-
+      baseline_time += double(_chronos.get_shortest_clicks()) / _chronos.frequency();
    }

-    double corrected_time = (calculate_time-baseline_time)/double(nb_loop);
+    double corrected_time = (calculate_time - baseline_time) / double(nb_loop);

+    //     INFOS("_nb_sample="<<_nb_sample);
+    //     INFOS("baseline_time="<<baseline_time);
+    //     INFOS("calculate_time="<<calculate_time);
+    //     INFOS("corrected_time="<<corrected_time);

-//     INFOS("_nb_sample="<<_nb_sample);
-//     INFOS("baseline_time="<<baseline_time);
-//     INFOS("calculate_time="<<calculate_time);
-//     INFOS("corrected_time="<<corrected_time);
+    //    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;

-//    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
-
-    return action.nb_op_base()/(corrected_time*1000000.0);
-    //return action.nb_op_base()/(calculate_time*1000000.0);
+    return action.nb_op_base() / (corrected_time * 1000000.0);
+    // return action.nb_op_base()/(calculate_time*1000000.0);
  }

-private:
-
+ private:
  X86_Timer _chronos;
  unsigned long long _nb_sample;
-
-
 };

-
-
 #endif
--- a/bench/btl/generic_bench/timers/x86_timer.hh
+++ b/bench/btl/generic_bench/timers/x86_timer.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  x86_timer.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef _X86_TIMER_HH
 #define _X86_TIMER_HH

@ -24,7 +24,7 @@
 #include <sys/resource.h>
 #include <unistd.h>
 #include <sys/times.h>
-//#include "system_time.h"
+// #include "system_time.h"
 #define u32 unsigned int
 #include <asm/msr.h>
 #include "utilities.h"
@ -34,213 +34,143 @@
 #include <iostream>

 // frequence de la becanne en Hz
-//#define FREQUENCY 648000000
-//#define FREQUENCY 1400000000
+// #define FREQUENCY 648000000
+// #define FREQUENCY 1400000000
 #define FREQUENCY 1695000000

 using namespace std;

-
 class X86_Timer {
+ public:
+  X86_Timer(void) : _frequency(FREQUENCY), _nb_sample(0) { MESSAGE("X86_Timer Default Ctor"); }

-public :
+  inline void start(void) { rdtsc(_click_start.n32[0], _click_start.n32[1]); }

-  X86_Timer( void ):_frequency(FREQUENCY),_nb_sample(0)
-  {
-    MESSAGE("X86_Timer Default Ctor");    
-  }
+  inline void stop(void) { rdtsc(_click_stop.n32[0], _click_stop.n32[1]); }

-  inline void start( void ){
+  inline double frequency(void) { return _frequency; }

-    rdtsc(_click_start.n32[0],_click_start.n32[1]);
+  double get_elapsed_time_in_second(void) { return (_click_stop.n64 - _click_start.n64) / double(FREQUENCY); }

-  }
-
-
-  inline void stop( void ){
-
-    rdtsc(_click_stop.n32[0],_click_stop.n32[1]);
-
-  }
-  
-
-  inline double frequency( void ){
-    return _frequency;
-  }
-
-  double get_elapsed_time_in_second( void ){
-
-    return (_click_stop.n64-_click_start.n64)/double(FREQUENCY);
-
-
-  }    
-
-  unsigned long long  get_click( void ){
-    
-    return (_click_stop.n64-_click_start.n64);
-
-  }    
-
-  inline void find_frequency( void ){
+  unsigned long long get_click(void) { return (_click_stop.n64 - _click_start.n64); }

+  inline void find_frequency(void) {
    time_t initial, final;
-    int dummy=2;
+    int dummy = 2;

    initial = time(0);
    start();
    do {
-      dummy+=2;
-    }
-    while(time(0)==initial);
+      dummy += 2;
+    } while (time(0) == initial);
    // On est au debut d'un cycle d'une seconde !!!
    initial = time(0);
    start();
    do {
-      dummy+=2;
-    }
-    while(time(0)==initial);
-    final=time(0);
+      dummy += 2;
+    } while (time(0) == initial);
+    final = time(0);
    stop();
    //    INFOS("fine grained time : "<<  get_elapsed_time_in_second());
    //  INFOS("coarse grained time : "<<  final-initial);
-    _frequency=_frequency*get_elapsed_time_in_second()/double(final-initial);
-    ///  INFOS("CPU frequency : "<<  _frequency);        
-
+    _frequency = _frequency * get_elapsed_time_in_second() / double(final - initial);
+    ///  INFOS("CPU frequency : "<<  _frequency);
  }

-  void  add_get_click( void ){
-       
+  void add_get_click(void) {
    _nb_sample++;
    _counted_clicks[get_click()]++;
    fill_history_clicks();
+  }

-  }    
+  void dump_statistics(string filemane) {
+    ofstream outfile(filemane.c_str(), ios::out);

-  void dump_statistics(string filemane){
-    
-    ofstream outfile (filemane.c_str(),ios::out) ;
+    std::map<unsigned long long, unsigned long long>::iterator itr;
+    for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
+      outfile << (*itr).first << "  " << (*itr).second << endl;
+    }

-    std::map<unsigned long long , unsigned long long>::iterator itr;
-    for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end()  ; itr++)
-      {      
-      outfile  << (*itr).first << "  " << (*itr).second << endl ;       
-      }      
-    
    outfile.close();
-
  }

-  void dump_history(string filemane){
-    
-    ofstream outfile (filemane.c_str(),ios::out) ;
+  void dump_history(string filemane) {
+    ofstream outfile(filemane.c_str(), ios::out);

+    for (int i = 0; i < _history_mean_clicks.size(); i++) {
+      outfile << i << " " << _history_mean_clicks[i] << " " << _history_shortest_clicks[i] << " "
+              << _history_most_occured_clicks[i] << endl;
+    }

-
-    for(int i=0 ; i<_history_mean_clicks.size() ; i++)
-      {      
-	outfile  << i << " " 
-		 << _history_mean_clicks[i] << " " 
-		 << _history_shortest_clicks[i] << " " 
-		 << _history_most_occured_clicks[i] << endl ;
-      }      
-    
    outfile.close();
-
-  }
-     
-
-
-  double get_mean_clicks( void ){
-    
-    std::map<unsigned long long,unsigned long long>::iterator itr;
-    
-    unsigned long long mean_clicks=0;
-
-    for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end()  ; itr++)
-      {      
-	
-	mean_clicks+=(*itr).second*(*itr).first;
-      }      
-
-    return mean_clicks/double(_nb_sample);
-
  }

-  double get_shortest_clicks( void ){
-    
-    return double((*_counted_clicks.begin()).first);
+  double get_mean_clicks(void) {
+    std::map<unsigned long long, unsigned long long>::iterator itr;

+    unsigned long long mean_clicks = 0;
+
+    for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
+      mean_clicks += (*itr).second * (*itr).first;
+    }
+
+    return mean_clicks / double(_nb_sample);
  }

-  void fill_history_clicks( void ){
+  double get_shortest_clicks(void) { return double((*_counted_clicks.begin()).first); }

+  void fill_history_clicks(void) {
    _history_mean_clicks.push_back(get_mean_clicks());
    _history_shortest_clicks.push_back(get_shortest_clicks());
    _history_most_occured_clicks.push_back(get_most_occured_clicks());
-
  }

+  double get_most_occured_clicks(void) {
+    unsigned long long moc = 0;
+    unsigned long long max_occurence = 0;

-  double get_most_occured_clicks( void ){
+    std::map<unsigned long long, unsigned long long>::iterator itr;

-    unsigned long long moc=0;
-    unsigned long long max_occurence=0;
-
-    std::map<unsigned long long,unsigned long long>::iterator itr;
-
-    for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end()  ; itr++)
-      {      
-	
-	if (max_occurence<=(*itr).second){
-	  max_occurence=(*itr).second;
-	  moc=(*itr).first;
-	}
-      }      
-    
-    return double(moc);    
+    for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
+      if (max_occurence <= (*itr).second) {
+        max_occurence = (*itr).second;
+        moc = (*itr).first;
+      }
+    }

+    return double(moc);
  }
-  
-  void clear( void )
-  {
+
+  void clear(void) {
    _counted_clicks.clear();

    _history_mean_clicks.clear();
    _history_shortest_clicks.clear();
    _history_most_occured_clicks.clear();

-    _nb_sample=0;
+    _nb_sample = 0;
  }

-
-    
-private :
-  
-  union
-  {
-    unsigned long int n32[2] ;
-    unsigned long long n64 ;
+ private:
+  union {
+    unsigned long int n32[2];
+    unsigned long long n64;
  } _click_start;

-  union
-  {
-    unsigned long int n32[2] ;
-    unsigned long long n64 ;
+  union {
+    unsigned long int n32[2];
+    unsigned long long n64;
  } _click_stop;

-  double _frequency ;
+  double _frequency;

-  map<unsigned long long,unsigned long long> _counted_clicks;
+  map<unsigned long long, unsigned long long> _counted_clicks;

  vector<double> _history_mean_clicks;
  vector<double> _history_shortest_clicks;
  vector<double> _history_most_occured_clicks;

  unsigned long long _nb_sample;
-
-  
-
 };

-
 #endif
--- a/bench/btl/generic_bench/utils/size_lin_log.hh
+++ b/bench/btl/generic_bench/utils/size_lin_log.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  size_lin_log.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  mar déc 3 18:59:37 CET 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,55 +16,41 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef SIZE_LIN_LOG
 #define SIZE_LIN_LOG

 #include "size_log.hh"

-template<class Vector>
-void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector & X)
-{
-  int ten=10;
-  int nine=9;
+template <class Vector>
+void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector& X) {
+  int ten = 10;
+  int nine = 9;

  X.resize(nb_point);

-  if (nb_point>ten){
-
-    for (int i=0;i<nine;i++){
-      
-      X[i]=i+1;
-
+  if (nb_point > ten) {
+    for (int i = 0; i < nine; i++) {
+      X[i] = i + 1;
    }

    Vector log_size;
-    size_log(nb_point-nine,ten,size_max,log_size);
-
-    for (int i=0;i<nb_point-nine;i++){
-      
-      X[i+nine]=log_size[i];
+    size_log(nb_point - nine, ten, size_max, log_size);

+    for (int i = 0; i < nb_point - nine; i++) {
+      X[i + nine] = log_size[i];
    }
-  }
-  else{
-
-    for (int i=0;i<nb_point;i++){
-      
-      X[i]=i+1;
-
+  } else {
+    for (int i = 0; i < nb_point; i++) {
+      X[i] = i + 1;
    }
  }

- //  for (int i=0;i<nb_point;i++){
-    
-//        INFOS("computed sizes : X["<<i<<"]="<<X[i]);
-    
-//   }
+  //  for (int i=0;i<nb_point;i++){

+  //        INFOS("computed sizes : X["<<i<<"]="<<X[i]);
+
+  //   }
 }
-  
+
 #endif
-    
-
-
--- a/bench/btl/generic_bench/utils/size_log.hh
+++ b/bench/btl/generic_bench/utils/size_log.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  size_log.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:17 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef SIZE_LOG
 #define SIZE_LOG

@ -25,30 +25,26 @@
 //            resize() method
 //            [] operator for setting element
 // the vector element are int compatible.
-template<class Vector>
-void size_log(const int nb_point, const int size_min, const int size_max, Vector & X)
-{
+template <class Vector>
+void size_log(const int nb_point, const int size_min, const int size_max, Vector& X) {
  X.resize(nb_point);

-  float ls_min=log(float(size_min));
-  float ls_max=log(float(size_max));
+  float ls_min = log(float(size_min));
+  float ls_max = log(float(size_max));

-  float ls=0.0;
+  float ls = 0.0;

-  float delta_ls=(ls_max-ls_min)/(float(nb_point-1));
+  float delta_ls = (ls_max - ls_min) / (float(nb_point - 1));

-  int size=0;
+  int size = 0;

-  for (int i=0;i<nb_point;i++){
+  for (int i = 0; i < nb_point; i++) {
+    ls = ls_min + float(i) * delta_ls;

-    ls = ls_min + float(i)*delta_ls ;
-    
-    size=int(exp(ls)); 
+    size = int(exp(ls));

-    X[i]=size;
+    X[i] = size;
  }
-
 }

-
 #endif
--- a/bench/btl/generic_bench/utils/utilities.h
+++ b/bench/btl/generic_bench/utils/utilities.h
@ -9,82 +9,120 @@

 /* ---  Definition macros file to print information if _DEBUG_ is defined --- */

-# ifndef UTILITIES_H
-# define UTILITIES_H
+#ifndef UTILITIES_H
+#define UTILITIES_H

-# include <stdlib.h>
-//# include <iostream> ok for gcc3.01
-# include <iostream>
+#include <stdlib.h>
+// # include <iostream> ok for gcc3.01
+#include <iostream>

 /* ---  INFOS is always defined (without _DEBUG_): to be used for warnings, with release version --- */

-# define HEREWEARE cout<<flush ; cerr << __FILE__ << " [" << __LINE__ << "] : " << flush ;
-# define INFOS(chain) {HEREWEARE ; cerr << chain << endl ;}
-# define PYSCRIPT(chain) {cout<<flush ; cerr << "---PYSCRIPT--- " << chain << endl ;}
+#define HEREWEARE \
+  cout << flush;  \
+  cerr << __FILE__ << " [" << __LINE__ << "] : " << flush;
+#define INFOS(chain)       \
+  {                        \
+    HEREWEARE;             \
+    cerr << chain << endl; \
+  }
+#define PYSCRIPT(chain)                         \
+  {                                             \
+    cout << flush;                              \
+    cerr << "---PYSCRIPT--- " << chain << endl; \
+  }

 /* --- To print date and time of compilation of current source on stdout --- */

-# if defined ( __GNUC__ )
-# define COMPILER		"g++" ;
-# elif defined ( __sun )
-# define COMPILER		"CC" ;
-# elif defined ( __KCC )
-# define COMPILER		"KCC" ;
-# elif defined ( __PGI )
-# define COMPILER		"pgCC" ;
-# else
-# define COMPILER		"undefined" ;
-# endif
+#if defined(__GNUC__)
+#define COMPILER "g++";
+#elif defined(__sun)
+#define COMPILER "CC";
+#elif defined(__KCC)
+#define COMPILER "KCC";
+#elif defined(__PGI)
+#define COMPILER "pgCC";
+#else
+#define COMPILER "undefined";
+#endif

-# ifdef INFOS_COMPILATION
-# error INFOS_COMPILATION already defined
-# endif
-# define INFOS_COMPILATION	{\
-					cerr << flush;\
-					cout << __FILE__ ;\
-					cout << " [" << __LINE__ << "] : " ;\
-					cout << "COMPILED with " << COMPILER ;\
-					cout << ", " << __DATE__ ; \
-					cout << " at " << __TIME__ << endl ;\
-					cout << "\n\n" ;\
-					cout << flush ;\
-				}
+#ifdef INFOS_COMPILATION
+#error INFOS_COMPILATION already defined
+#endif
+#define INFOS_COMPILATION                 \
+  {                                       \
+    cerr << flush;                        \
+    cout << __FILE__;                     \
+    cout << " [" << __LINE__ << "] : ";   \
+    cout << "COMPILED with " << COMPILER; \
+    cout << ", " << __DATE__;             \
+    cout << " at " << __TIME__ << endl;   \
+    cout << "\n\n";                       \
+    cout << flush;                        \
+  }

-# ifdef _DEBUG_
+#ifdef _DEBUG_

 /* --- the following MACROS are useful at debug time --- */

-# define HERE cout<<flush ; cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush ;
-# define SCRUTE(var) HERE ; cerr << #var << "=" << var << endl ;
-# define MESSAGE(chain) {HERE ; cerr << chain << endl ;}
-# define INTERRUPTION(code) HERE ; cerr << "INTERRUPTION return code= " << code << endl ; exit(code) ;
+#define HERE     \
+  cout << flush; \
+  cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush;
+#define SCRUTE(var) \
+  HERE;             \
+  cerr << #var << "=" << var << endl;
+#define MESSAGE(chain)     \
+  {                        \
+    HERE;                  \
+    cerr << chain << endl; \
+  }
+#define INTERRUPTION(code)                              \
+  HERE;                                                 \
+  cerr << "INTERRUPTION return code= " << code << endl; \
+  exit(code);

-# ifndef ASSERT
-# define ASSERT(condition) if (!(condition)){ HERE ; cerr << "CONDITION " << #condition << " NOT VERIFIED"<< endl ; INTERRUPTION(1) ;}
-# endif /* ASSERT */
+#ifndef ASSERT
+#define ASSERT(condition)                                          \
+  if (!(condition)) {                                              \
+    HERE;                                                          \
+    cerr << "CONDITION " << #condition << " NOT VERIFIED" << endl; \
+    INTERRUPTION(1);                                               \
+  }
+#endif /* ASSERT */

-#define REPERE cout<<flush ; cerr << "   --------------" << endl << flush ;
-#define BEGIN_OF(chain) {REPERE ; HERE ; cerr << "Begin of: " << chain << endl ; REPERE ; }
-#define END_OF(chain) {REPERE ; HERE ; cerr << "Normal end of: " << chain << endl ; REPERE ; }
+#define REPERE   \
+  cout << flush; \
+  cerr << "   --------------" << endl << flush;
+#define BEGIN_OF(chain)                    \
+  {                                        \
+    REPERE;                                \
+    HERE;                                  \
+    cerr << "Begin of: " << chain << endl; \
+    REPERE;                                \
+  }
+#define END_OF(chain)                           \
+  {                                             \
+    REPERE;                                     \
+    HERE;                                       \
+    cerr << "Normal end of: " << chain << endl; \
+    REPERE;                                     \
+  }

+#else /* ifdef _DEBUG_*/

+#define HERE
+#define SCRUTE(var)
+#define MESSAGE(chain)
+#define INTERRUPTION(code)

-# else /* ifdef _DEBUG_*/
-
-# define HERE
-# define SCRUTE(var)
-# define MESSAGE(chain)
-# define INTERRUPTION(code)
-
-# ifndef ASSERT
-# define ASSERT(condition)
-# endif /* ASSERT */
+#ifndef ASSERT
+#define ASSERT(condition)
+#endif /* ASSERT */

 #define REPERE
 #define BEGIN_OF(chain)
 #define END_OF(chain)

+#endif /* ifdef _DEBUG_*/

-# endif /* ifdef _DEBUG_*/
-
-# endif /* ifndef UTILITIES_H */
+#endif /* ifndef UTILITIES_H */
--- a/bench/btl/generic_bench/utils/xy_file.hh
+++ b/bench/btl/generic_bench/utils/xy_file.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  dump_file_x_y.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:20 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef XY_FILE_HH
 #define XY_FILE_HH
 #include <fstream>
@ -25,24 +25,22 @@
 #include <vector>
 using namespace std;

-bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
-                  std::vector<double> & tab_mflops, bool quiet = false)
-{
+bool read_xy_file(const std::string& filename, std::vector<int>& tab_sizes, std::vector<double>& tab_mflops,
+                  bool quiet = false) {
+  std::ifstream input_file(filename.c_str(), std::ios::in);

-  std::ifstream input_file (filename.c_str(),std::ios::in);
-
-  if (!input_file){
+  if (!input_file) {
    if (!quiet) {
-      INFOS("!!! Error opening "<<filename);
+      INFOS("!!! Error opening " << filename);
    }
    return false;
  }

-  int nb_point=0;
-  int size=0;
-  double mflops=0;
+  int nb_point = 0;
+  int size = 0;
+  double mflops = 0;

-  while (input_file >> size >> mflops ){
+  while (input_file >> size >> mflops) {
    nb_point++;
    tab_sizes.push_back(size);
    tab_mflops.push_back(mflops);
@ -60,16 +58,14 @@ bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,

 using namespace std;

-template<class Vector_A, class Vector_B>
-void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){
-  
-  ofstream outfile (filename.c_str(),ios::out) ;
-  int size=X.size();
-  
-  for (int i=0;i<size;i++)
-    outfile << X[i] << " " << Y[i] << endl;
+template <class Vector_A, class Vector_B>
+void dump_xy_file(const Vector_A& X, const Vector_B& Y, const std::string& filename) {
+  ofstream outfile(filename.c_str(), ios::out);
+  int size = X.size();
+
+  for (int i = 0; i < size; i++) outfile << X[i] << " " << Y[i] << endl;

  outfile.close();
-} 
+}

 #endif
--- a/bench/btl/libs/BLAS/blas.h
+++ b/bench/btl/libs/BLAS/blas.h
--- a/bench/btl/libs/BLAS/blas_interface.hh
+++ b/bench/btl/libs/BLAS/blas_interface.hh
@ -22,24 +22,27 @@

 #include <c_interface_base.h>
 #include <complex>
-extern "C"
-{
+extern "C" {
 #include "blas.h"

-  // Cholesky Factorization
+// Cholesky Factorization
 //   void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
 //   void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
-  void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
-  void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work, int *lwork, int *info );
-  void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
-  void dgehrd_( const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork, int *info );
+void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work,
+             int *lwork, int *info);
+void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work,
+             int *lwork, int *info);
+void sgehrd_(const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork,
+             int *info);
+void dgehrd_(const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork,
+             int *info);

-  // LU row pivoting
+// LU row pivoting
 //   void dgetrf_( int *m, int *n, double *a, int *lda, int *ipiv, int *info );
 //   void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info);
-  // LU full pivoting
-  void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info );
-  void dgetc2_(const int* n, double *a, const int *lda, int *ipiv, int *jpiv, int*info );
+// LU full pivoting
+void sgetc2_(const int *n, float *a, const int *lda, int *ipiv, int *jpiv, int *info);
+void dgetc2_(const int *n, double *a, const int *lda, int *ipiv, int *jpiv, int *info);
 #ifdef HAS_LAPACK
 #endif
 }
@ -47,12 +50,11 @@ extern "C"
 #define MAKE_STRING2(S) #S
 #define MAKE_STRING(S) MAKE_STRING2(S)

-#define CAT2(A,B) A##B
-#define CAT(A,B) CAT2(A,B)
-
-
-template<class real> class blas_interface;
+#define CAT2(A, B) A##B
+#define CAT(A, B) CAT2(A, B)

+template <class real>
+class blas_interface;

 static char notrans = 'N';
 static char trans = 'T';
@ -62,22 +64,16 @@ static char right = 'R';
 static char left = 'L';
 static int intone = 1;

-
-
-#define SCALAR        float
+#define SCALAR float
 #define SCALAR_PREFIX s
 #include "blas_interface_impl.hh"
 #undef SCALAR
 #undef SCALAR_PREFIX

-
-#define SCALAR        double
+#define SCALAR double
 #define SCALAR_PREFIX d
 #include "blas_interface_impl.hh"
 #undef SCALAR
 #undef SCALAR_PREFIX

 #endif
-
-
-
--- a/bench/btl/libs/BLAS/blas_interface_impl.hh
+++ b/bench/btl/libs/BLAS/blas_interface_impl.hh
@ -1,116 +1,109 @@

-#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
+#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX, NAME), _)

-template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
-{
-
-public :
-  
+template <>
+class blas_interface<SCALAR> : public c_interface_base<SCALAR> {
+ public:
  static SCALAR fone;
  static SCALAR fzero;

-  static inline std::string name()
-  {
-    return MAKE_STRING(CBLASNAME);
+  static inline std::string name() { return MAKE_STRING(CBLASNAME); }
+
+  static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    BLAS_FUNC(gemv)(&notrans, &N, &N, &fone, A, &N, B, &intone, &fzero, X, &intone);
  }

-  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    BLAS_FUNC(gemv)(&notrans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
+  static inline void symv(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    BLAS_FUNC(symv)(&lower, &N, &fone, A, &N, B, &intone, &fzero, X, &intone);
  }

-  static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone);
+  static inline void syr2(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    BLAS_FUNC(syr2)(&lower, &N, &fone, B, &intone, X, &intone, A, &N);
  }

-  static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
+  static inline void ger(gene_matrix& A, gene_vector& X, gene_vector& Y, int N) {
+    BLAS_FUNC(ger)(&N, &N, &fone, X, &intone, Y, &intone, A, &N);
  }

-  static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
-    BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N);
+  static inline void rot(gene_vector& A, gene_vector& B, SCALAR c, SCALAR s, int N) {
+    BLAS_FUNC(rot)(&N, A, &intone, B, &intone, &c, &s);
  }

-  static inline void rot(gene_vector & A,  gene_vector & B, SCALAR c, SCALAR s, int N){
-    BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s);
+  static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    BLAS_FUNC(gemv)(&trans, &N, &N, &fone, A, &N, B, &intone, &fzero, X, &intone);
  }

-  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
+  static inline void matrix_matrix_product(gene_matrix& A, gene_matrix& B, gene_matrix& X, int N) {
+    BLAS_FUNC(gemm)(&notrans, &notrans, &N, &N, &N, &fone, A, &N, B, &N, &fzero, X, &N);
  }

-  static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
-    BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
+  static inline void transposed_matrix_matrix_product(gene_matrix& A, gene_matrix& B, gene_matrix& X, int N) {
+    BLAS_FUNC(gemm)(&notrans, &notrans, &N, &N, &N, &fone, A, &N, B, &N, &fzero, X, &N);
  }

-  static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
-    BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
+  static inline void ata_product(gene_matrix& A, gene_matrix& X, int N) {
+    BLAS_FUNC(syrk)(&lower, &trans, &N, &N, &fone, A, &N, &fzero, X, &N);
  }

-  static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
-    BLAS_FUNC(syrk)(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
+  static inline void aat_product(gene_matrix& A, gene_matrix& X, int N) {
+    BLAS_FUNC(syrk)(&lower, &notrans, &N, &N, &fone, A, &N, &fzero, X, &N);
  }

-  static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
-    BLAS_FUNC(syrk)(&lower,&notrans,&N,&N,&fone,A,&N,&fzero,X,&N);
+  static inline void axpy(SCALAR coef, const gene_vector& X, gene_vector& Y, int N) {
+    BLAS_FUNC(axpy)(&N, &coef, X, &intone, Y, &intone);
  }

-  static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){
-    BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone);
+  static inline void axpby(SCALAR a, const gene_vector& X, SCALAR b, gene_vector& Y, int N) {
+    BLAS_FUNC(scal)(&N, &b, Y, &intone);
+    BLAS_FUNC(axpy)(&N, &a, X, &intone, Y, &intone);
  }

-  static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){
-    BLAS_FUNC(scal)(&N,&b,Y,&intone);
-    BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone);
-  }
-
-  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
-    int N2 = N*N;
+  static inline void cholesky(const gene_matrix& X, gene_matrix& C, int N) {
+    int N2 = N * N;
    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
    char uplo = 'L';
    int info = 0;
    BLAS_FUNC(potrf)(&uplo, &N, C, &N, &info);
-    if(info!=0) std::cerr << "potrf_ error " << info << "\n";
+    if (info != 0) std::cerr << "potrf_ error " << info << "\n";
  }

-  static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
-    int N2 = N*N;
+  static inline void partial_lu_decomp(const gene_matrix& X, gene_matrix& C, int N) {
+    int N2 = N * N;
    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
    int info = 0;
-    int * ipiv = (int*)alloca(sizeof(int)*N);
+    int* ipiv = (int*)alloca(sizeof(int) * N);
    BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info);
-    if(info!=0) std::cerr << "getrf_ error " << info << "\n";
+    if (info != 0) std::cerr << "getrf_ error " << info << "\n";
  }
-  
-  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+
+  static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
    BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
    BLAS_FUNC(trsv)(&lower, &notrans, &nonunit, &N, L, &N, X, &intone);
  }

-  static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
+  static inline void trisolve_lower_matrix(const gene_matrix& L, const gene_matrix& B, gene_matrix& X, int N) {
    BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
    BLAS_FUNC(trsm)(&right, &lower, &notrans, &nonunit, &N, &N, &fone, L, &N, X, &N);
  }

-  static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & /*X*/, int N){
-    BLAS_FUNC(trmm)(&left, &lower, &notrans,&nonunit, &N,&N,&fone,A,&N,B,&N);
+  static inline void trmm(gene_matrix& A, gene_matrix& B, gene_matrix& /*X*/, int N) {
+    BLAS_FUNC(trmm)(&left, &lower, &notrans, &nonunit, &N, &N, &fone, A, &N, B, &N);
  }

-  #ifdef HAS_LAPACK
+#ifdef HAS_LAPACK

-  static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
-    int N2 = N*N;
+  static inline void lu_decomp(const gene_matrix& X, gene_matrix& C, int N) {
+    int N2 = N * N;
    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
    int info = 0;
-    int * ipiv = (int*)alloca(sizeof(int)*N);
-    int * jpiv = (int*)alloca(sizeof(int)*N);
+    int* ipiv = (int*)alloca(sizeof(int) * N);
+    int* jpiv = (int*)alloca(sizeof(int) * N);
    BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info);
  }

-
-
-  static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
+  static inline void hessenberg(const gene_matrix& X, gene_matrix& C, int N) {
    {
-      int N2 = N*N;
+      int N2 = N * N;
      int inc = 1;
      BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
    }
@ -118,29 +111,28 @@ public :
    int ilo = 1;
    int ihi = N;
    int bsize = 64;
-    int worksize = N*bsize;
-    SCALAR* d = new SCALAR[N+worksize];
-    BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info);
+    int worksize = N * bsize;
+    SCALAR* d = new SCALAR[N + worksize];
+    BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d + N, &worksize, &info);
    delete[] d;
  }

-  static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
+  static inline void tridiagonalization(const gene_matrix& X, gene_matrix& C, int N) {
    {
-      int N2 = N*N;
+      int N2 = N * N;
      int inc = 1;
      BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
    }
    char uplo = 'U';
    int info = 0;
    int bsize = 64;
-    int worksize = N*bsize;
-    SCALAR* d = new SCALAR[3*N+worksize];
-    BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info);
+    int worksize = N * bsize;
+    SCALAR* d = new SCALAR[3 * N + worksize];
+    BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d + N, d + 2 * N, d + 3 * N, &worksize, &info);
    delete[] d;
  }
-  
-  #endif // HAS_LAPACK

+#endif  // HAS_LAPACK
 };

 SCALAR blas_interface<SCALAR>::fone = SCALAR(1);
--- a/bench/btl/libs/BLAS/c_interface_base.h
+++ b/bench/btl/libs/BLAS/c_interface_base.h
@ -5,69 +5,57 @@
 #include "utilities.h"
 #include <vector>

-template<class real> class c_interface_base
-{
-
-public:
-
-  typedef real                      real_type;
-  typedef std::vector<real>         stl_vector;
-  typedef std::vector<stl_vector >  stl_matrix;
+template <class real>
+class c_interface_base {
+ public:
+  typedef real real_type;
+  typedef std::vector<real> stl_vector;
+  typedef std::vector<stl_vector> stl_matrix;

  typedef real* gene_matrix;
  typedef real* gene_vector;

-  static void free_matrix(gene_matrix & A, int /*N*/){
-    delete[] A;
-  }
+  static void free_matrix(gene_matrix& A, int /*N*/) { delete[] A; }

-  static void free_vector(gene_vector & B){
-    delete[] B;
-  }
+  static void free_vector(gene_vector& B) { delete[] B; }

-  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+  static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
    int N = A_stl.size();
-    A = new real[N*N];
-    for (int j=0;j<N;j++)
-      for (int i=0;i<N;i++)
-        A[i+N*j] = A_stl[j][i];
+    A = new real[N * N];
+    for (int j = 0; j < N; j++)
+      for (int i = 0; i < N; i++) A[i + N * j] = A_stl[j][i];
  }

-  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+  static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
    int N = B_stl.size();
    B = new real[N];
-    for (int i=0;i<N;i++)
-      B[i] = B_stl[i];
+    for (int i = 0; i < N; i++) B[i] = B_stl[i];
  }

-  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
+  static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
    int N = B_stl.size();
-    for (int i=0;i<N;i++)
-      B_stl[i] = B[i];
+    for (int i = 0; i < N; i++) B_stl[i] = B[i];
  }

-  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
+  static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
    int N = A_stl.size();
-    for (int j=0;j<N;j++){
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++)
-        A_stl[j][i] = A[i+N*j];
+      for (int i = 0; i < N; i++) A_stl[j][i] = A[i + N * j];
    }
  }

-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    for (int i=0;i<N;i++)
-      cible[i]=source[i];
+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) {
+    for (int i = 0; i < N; i++) cible[i] = source[i];
  }

-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
-    for (int j=0;j<N;j++){
-      for (int i=0;i<N;i++){
-        cible[i+N*j] = source[i+N*j];
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) {
+    for (int j = 0; j < N; j++) {
+      for (int i = 0; i < N; i++) {
+        cible[i + N * j] = source[i + N * j];
      }
    }
  }
-
 };

 #endif
--- a/bench/btl/libs/BLAS/main.cpp
+++ b/bench/btl/libs/BLAS/main.cpp
@ -33,41 +33,37 @@

 BTL_MAIN;

-int main()
-{
+int main() {
+  bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_matrix_vector_product<blas_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<blas_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_symv<blas_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_syr2<blas_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);

-  bench<Action_matrix_vector_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_symv<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_syr2<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+  bench<Action_ger<blas_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_ger<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

-  bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_trisolve_matrix<blas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

-  bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_trisolve_matrix<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_trmm<blas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

-  bench<Action_trmm<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_cholesky<blas_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
+  bench<Action_partial_lu<blas_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);

-  bench<Action_cholesky<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  bench<Action_partial_lu<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+#ifdef HAS_LAPACK
+  //   bench<Action_lu_decomp<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+  bench<Action_hessenberg<blas_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
+  bench<Action_tridiagonalization<blas_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
+#endif

-  #ifdef HAS_LAPACK
-//   bench<Action_lu_decomp<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  bench<Action_hessenberg<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  bench<Action_tridiagonalization<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  #endif
-
-  //bench<Action_lu_solve<blas_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+  // bench<Action_lu_solve<blas_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/STL/STL_interface.hh
+++ b/bench/btl/libs/STL/STL_interface.hh
@ -25,223 +25,178 @@

 using namespace std;

-template<class real>
-class STL_interface{
+template <class real>
+class STL_interface {
+ public:
+  typedef real real_type;

-public :
-
-  typedef real real_type ;
-
-  typedef std::vector<real>  stl_vector;
-  typedef std::vector<stl_vector > stl_matrix;
+  typedef std::vector<real> stl_vector;
+  typedef std::vector<stl_vector> stl_matrix;

  typedef stl_matrix gene_matrix;

  typedef stl_vector gene_vector;

-  static inline std::string name( void )
-  {
-    return "STL";
-  }
+  static inline std::string name(void) { return "STL"; }

-  static void free_matrix(gene_matrix & /*A*/, int /*N*/){}
+  static void free_matrix(gene_matrix& /*A*/, int /*N*/) {}

-  static void free_vector(gene_vector & /*B*/){}
+  static void free_vector(gene_vector& /*B*/) {}

-  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
-    A = A_stl;
-  }
+  static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) { A = A_stl; }

-  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
-    B = B_stl;
-  }
+  static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) { B = B_stl; }

-  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    B_stl = B ;
-  }
+  static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) { B_stl = B; }

+  static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) { A_stl = A; }

-  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
-    A_stl = A ;
-  }
-
-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    for (int i=0;i<N;i++){
-      cible[i]=source[i];
+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) {
+    for (int i = 0; i < N; i++) {
+      cible[i] = source[i];
    }
  }

-
-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
-    for (int i=0;i<N;i++)
-      for (int j=0;j<N;j++)
-        cible[i][j]=source[i][j];
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) {
+    for (int i = 0; i < N; i++)
+      for (int j = 0; j < N; j++) cible[i][j] = source[i][j];
  }

-  static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
-  {
+  static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) {
    real somme;
-    for (int j=0;j<N;j++){
-      for (int i=0;i<N;i++){
-        somme=0.0;
-        if(i>=j)
-        {
-        for (int k=0;k<N;k++)
-          somme += A[i][k]*A[j][k];
-        X[j][i]=somme;
-	}
-      }
-    }
-  }
-
-  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
-  {
-    real somme;
-    for (int j=0;j<N;j++){
-      for (int i=0;i<N;i++){
-        somme=0.0;
-        if(i>=j)
-        {
-          for (int k=0;k<N;k++){
-            somme+=A[k][i]*A[k][j];
-          }
-          X[j][i]=somme;
+    for (int j = 0; j < N; j++) {
+      for (int i = 0; i < N; i++) {
+        somme = 0.0;
+        if (i >= j) {
+          for (int k = 0; k < N; k++) somme += A[i][k] * A[j][k];
+          X[j][i] = somme;
        }
      }
    }
  }

-
-  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
-  {
+  static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) {
    real somme;
-    for (int j=0;j<N;j++){
-      for (int i=0;i<N;i++){
-        somme=0.0;
-        for (int k=0;k<N;k++)
-          somme+=A[k][i]*B[j][k];
-        X[j][i]=somme;
+    for (int j = 0; j < N; j++) {
+      for (int i = 0; i < N; i++) {
+        somme = 0.0;
+        if (i >= j) {
+          for (int k = 0; k < N; k++) {
+            somme += A[k][i] * A[k][j];
+          }
+          X[j][i] = somme;
+        }
      }
    }
  }

-  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
+  static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
    real somme;
-    for (int i=0;i<N;i++){
-      somme=0.0;
-      for (int j=0;j<N;j++)
-        somme+=A[j][i]*B[j];
-      X[i]=somme;
+    for (int j = 0; j < N; j++) {
+      for (int i = 0; i < N; i++) {
+        somme = 0.0;
+        for (int k = 0; k < N; k++) somme += A[k][i] * B[j][k];
+        X[j][i] = somme;
+      }
    }
  }

-  static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
-    for (int j=0; j<N; ++j)
-      X[j] = 0;
-    for (int j=0; j<N; ++j)
-    {
+  static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    real somme;
+    for (int i = 0; i < N; i++) {
+      somme = 0.0;
+      for (int j = 0; j < N; j++) somme += A[j][i] * B[j];
+      X[i] = somme;
+    }
+  }
+
+  static inline void symv(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    for (int j = 0; j < N; ++j) X[j] = 0;
+    for (int j = 0; j < N; ++j) {
      real t1 = B[j];
      real t2 = 0;
      X[j] += t1 * A[j][j];
-      for (int i=j+1; i<N; ++i) {
+      for (int i = j + 1; i < N; ++i) {
        X[i] += t1 * A[j][i];
        t2 += A[j][i] * B[i];
      }
      X[j] += t2;
    }
  }
-  
-  static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
-    for (int j=0; j<N; ++j)
-    {
-      for (int i=j; i<N; ++i)
-        A[j][i] += B[i]*X[j] + B[j]*X[i];
+
+  static inline void syr2(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    for (int j = 0; j < N; ++j) {
+      for (int i = j; i < N; ++i) A[j][i] += B[i] * X[j] + B[j] * X[i];
    }
  }

-  static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N)
-  {
-    for (int j=0; j<N; ++j)
-    {
-      for (int i=j; i<N; ++i)
-        A[j][i] += X[i]*Y[j];
+  static inline void ger(gene_matrix& A, gene_vector& X, gene_vector& Y, int N) {
+    for (int j = 0; j < N; ++j) {
+      for (int i = j; i < N; ++i) A[j][i] += X[i] * Y[j];
    }
  }

-  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
+  static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
    real somme;
-    for (int i=0;i<N;i++){
+    for (int i = 0; i < N; i++) {
      somme = 0.0;
-      for (int j=0;j<N;j++)
-        somme += A[i][j]*B[j];
+      for (int j = 0; j < N; j++) somme += A[i][j] * B[j];
      X[i] = somme;
    }
  }

-  static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
-    for (int i=0;i<N;i++)
-      Y[i]+=coef*X[i];
+  static inline void axpy(real coef, const gene_vector& X, gene_vector& Y, int N) {
+    for (int i = 0; i < N; i++) Y[i] += coef * X[i];
  }

-  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
-    for (int i=0;i<N;i++)
-      Y[i] = a*X[i] + b*Y[i];
+  static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) {
+    for (int i = 0; i < N; i++) Y[i] = a * X[i] + b * Y[i];
  }

-  static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
-    copy_vector(B,X,N);
-    for(int i=0; i<N; ++i)
-    {
+  static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
+    copy_vector(B, X, N);
+    for (int i = 0; i < N; ++i) {
      X[i] /= L[i][i];
      real tmp = X[i];
-      for (int j=i+1; j<N; ++j)
-        X[j] -= tmp * L[i][j];
+      for (int j = i + 1; j < N; ++j) X[j] -= tmp * L[i][j];
    }
  }

-  static inline real norm_diff(const stl_vector & A, const stl_vector & B)
-  {
-    int N=A.size();
-    real somme=0.0;
-    real somme2=0.0;
+  static inline real norm_diff(const stl_vector& A, const stl_vector& B) {
+    int N = A.size();
+    real somme = 0.0;
+    real somme2 = 0.0;

-    for (int i=0;i<N;i++){
-      real diff=A[i]-B[i];
-      somme+=diff*diff;
-      somme2+=A[i]*A[i];
+    for (int i = 0; i < N; i++) {
+      real diff = A[i] - B[i];
+      somme += diff * diff;
+      somme2 += A[i] * A[i];
    }
-    return somme/somme2;
+    return somme / somme2;
  }

-  static inline real norm_diff(const stl_matrix & A, const stl_matrix & B)
-  {
-    int N=A[0].size();
-    real somme=0.0;
-    real somme2=0.0;
+  static inline real norm_diff(const stl_matrix& A, const stl_matrix& B) {
+    int N = A[0].size();
+    real somme = 0.0;
+    real somme2 = 0.0;

-    for (int i=0;i<N;i++){
-      for (int j=0;j<N;j++){
-        real diff=A[i][j] - B[i][j];
-        somme += diff*diff;
-        somme2 += A[i][j]*A[i][j];
+    for (int i = 0; i < N; i++) {
+      for (int j = 0; j < N; j++) {
+        real diff = A[i][j] - B[i][j];
+        somme += diff * diff;
+        somme2 += A[i][j] * A[i][j];
      }
    }

-    return somme/somme2;
+    return somme / somme2;
  }

-  static inline void display_vector(const stl_vector & A)
-  {
-    int N=A.size();
-    for (int i=0;i<N;i++){
-      INFOS("A["<<i<<"]="<<A[i]<<endl);
+  static inline void display_vector(const stl_vector& A) {
+    int N = A.size();
+    for (int i = 0; i < N; i++) {
+      INFOS("A[" << i << "]=" << A[i] << endl);
    }
  }
-
 };

 #endif
--- a/bench/btl/libs/STL/main.cpp
+++ b/bench/btl/libs/STL/main.cpp
@ -24,19 +24,16 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+int main() {
+  bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/blaze/blaze_interface.hh
+++ b/bench/btl/libs/blaze/blaze_interface.hh
@ -25,117 +25,102 @@

 #include <vector>

-template<class real>
+template <class real>
 class blaze_interface {
+ public:
+  typedef real real_type;

-public :
+  typedef std::vector<real> stl_vector;
+  typedef std::vector<stl_vector> stl_matrix;

-  typedef real real_type ;
-
-  typedef std::vector<real>        stl_vector;
-  typedef std::vector<stl_vector > stl_matrix;
-
-  typedef blaze::DynamicMatrix<real,blaze::columnMajor>  gene_matrix;
-  typedef blaze::DynamicVector<real>  gene_vector;
+  typedef blaze::DynamicMatrix<real, blaze::columnMajor> gene_matrix;
+  typedef blaze::DynamicVector<real> gene_vector;

  static inline std::string name() { return "blaze"; }

-  static void free_matrix(gene_matrix & A, int N){
-    return ;
-  }
+  static void free_matrix(gene_matrix& A, int N) { return; }

-  static void free_vector(gene_vector & B){
-    return ;
-  }
+  static void free_vector(gene_vector& B) { return; }

-  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+  static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
    A.resize(A_stl[0].size(), A_stl.size());

-    for (int j=0; j<A_stl.size() ; j++){
-      for (int i=0; i<A_stl[j].size() ; i++){
-        A(i,j) = A_stl[j][i];
+    for (int j = 0; j < A_stl.size(); j++) {
+      for (int i = 0; i < A_stl[j].size(); i++) {
+        A(i, j) = A_stl[j][i];
      }
    }
  }

-  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+  static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
    B.resize(B_stl.size());
-    for (int i=0; i<B_stl.size() ; i++){
+    for (int i = 0; i < B_stl.size(); i++) {
      B[i] = B_stl[i];
    }
  }

-  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    for (int i=0; i<B_stl.size() ; i++){
+  static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
+    for (int i = 0; i < B_stl.size(); i++) {
      B_stl[i] = B[i];
    }
  }

-  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
-    int N=A_stl.size();
-    for (int j=0;j<N;j++){
+  static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
+    int N = A_stl.size();
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++){
-        A_stl[j][i] = A(i,j);
+      for (int i = 0; i < N; i++) {
+        A_stl[j][i] = A(i, j);
      }
    }
  }

-  static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (A*B);
+  static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
+                                                      int N) {
+    X = (A * B);
  }

-  static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (trans(A)*trans(B));
+  static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B,
+                                                                 gene_matrix& X, int N) {
+    X = (trans(A) * trans(B));
  }

-  static EIGEN_DONT_INLINE void ata_product(const gene_matrix & A, gene_matrix & X, int N){
-    X = (trans(A)*A);
+  static EIGEN_DONT_INLINE void ata_product(const gene_matrix& A, gene_matrix& X, int N) { X = (trans(A) * A); }
+
+  static EIGEN_DONT_INLINE void aat_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A * trans(A)); }
+
+  static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    X = (A * B);
  }

-  static EIGEN_DONT_INLINE void aat_product(const gene_matrix & A, gene_matrix & X, int N){
-    X = (A*trans(A));
+  static EIGEN_DONT_INLINE void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    X = (trans(A) * B);
  }

-  static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = (A*B);
+  static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }
+
+  static EIGEN_DONT_INLINE void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) {
+    Y = a * X + b * Y;
  }

-  static EIGEN_DONT_INLINE void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = (trans(A)*B);
-  }
+  //   static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
+  //     C = X;
+  //     recursive_cholesky(C);
+  //   }

-  static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
-    Y += coef * X;
-  }
+  //   static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
+  //     R = X;
+  //     std::vector<int> ipvt(N);
+  //     lu_factor(R, ipvt);
+  //   }

-  static EIGEN_DONT_INLINE void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
-    Y = a*X + b*Y;
-  }
+  //   static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+  //     X = lower_trisolve(L, B);
+  //   }

-//   static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
-//     C = X;
-//     recursive_cholesky(C);
-//   }
-
-//   static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
-//     R = X;
-//     std::vector<int> ipvt(N);
-//     lu_factor(R, ipvt);
-//   }
-
-//   static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
-//     X = lower_trisolve(L, B);
-//   }
-
-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
-    cible = source;
-  }
-
-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    cible = source;
-  }
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { cible = source; }

+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { cible = source; }
 };

 #endif
--- a/bench/btl/libs/blaze/main.cpp
+++ b/bench/btl/libs/blaze/main.cpp
@ -22,19 +22,15 @@

 BTL_MAIN;

-int main()
-{
+int main() {
+  bench<Action_axpy<blaze_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<blaze_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_axpy<blaze_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<blaze_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-
-  bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/blitz/blitz_LU_solve_interface.hh
+++ b/bench/btl/libs/blitz/blitz_LU_solve_interface.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  blitz_LU_solve_interface.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:31 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef BLITZ_LU_SOLVE_INTERFACE_HH
 #define BLITZ_LU_SOLVE_INTERFACE_HH

@ -25,168 +25,136 @@

 BZ_USING_NAMESPACE(blitz)

-template<class real>
-class blitz_LU_solve_interface : public blitz_interface<real>
-{
-
-public :
-
+template <class real>
+class blitz_LU_solve_interface : public blitz_interface<real> {
+ public:
  typedef typename blitz_interface<real>::gene_matrix gene_matrix;
  typedef typename blitz_interface<real>::gene_vector gene_vector;

-  typedef blitz::Array<int,1> Pivot_Vector;
+  typedef blitz::Array<int, 1> Pivot_Vector;

-  inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
-  {
+  inline static void new_Pivot_Vector(Pivot_Vector &pivot, int N) { pivot.resize(N); }

-    pivot.resize(N);
+  inline static void free_Pivot_Vector(Pivot_Vector &pivot) { return; }

-  }
+  static inline real matrix_vector_product_sliced(const gene_matrix &A, gene_vector B, int row, int col_start,
+                                                  int col_end) {
+    real somme = 0.;

-  inline static void free_Pivot_Vector(Pivot_Vector & pivot)
-  {
-    
-    return;
-
-  }
-
-
-  static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
-  {
-    
-    real somme=0.;
-    
-    for (int j=col_start ; j<col_end+1 ; j++){
-	
-	somme+=A(row,j)*B(j);
-	
+    for (int j = col_start; j < col_end + 1; j++) {
+      somme += A(row, j) * B(j);
    }

    return somme;
-
  }

+  static inline real matrix_matrix_product_sliced(gene_matrix &A, int row, int col_start, int col_end, gene_matrix &B,
+                                                  int row_shift, int col) {
+    real somme = 0.;

-
-
-  static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
-  {
-    
-    real somme=0.;
-    
-    for (int j=col_start ; j<col_end+1 ; j++){
-	
-	somme+=A(row,j)*B(j+row_shift,col);
-	
+    for (int j = col_start; j < col_end + 1; j++) {
+      somme += A(row, j) * B(j + row_shift, col);
    }

    return somme;
-
  }

-  inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
-  {
-
-    ASSERT( LU.rows()==LU.cols() ) ;
-    int index_max = 0 ;
-    real big = 0. ;
-    real theSum = 0. ;
-    real dum = 0. ;
+  inline static void LU_factor(gene_matrix &LU, Pivot_Vector &pivot, int N) {
+    ASSERT(LU.rows() == LU.cols());
+    int index_max = 0;
+    real big = 0.;
+    real theSum = 0.;
+    real dum = 0.;
    // Get the implicit scaling information :
-    gene_vector ImplicitScaling( N ) ;
-    for( int i=0; i<N; i++ ) {
-      big = 0. ;
-      for( int j=0; j<N; j++ ) {
-	if( abs( LU( i, j ) )>=big ) big = abs( LU( i, j ) ) ;
+    gene_vector ImplicitScaling(N);
+    for (int i = 0; i < N; i++) {
+      big = 0.;
+      for (int j = 0; j < N; j++) {
+        if (abs(LU(i, j)) >= big) big = abs(LU(i, j));
      }
-      if( big==0. ) {
-	INFOS( "blitz_LU_factor::Singular matrix" ) ;
-	exit( 0 ) ;
+      if (big == 0.) {
+        INFOS("blitz_LU_factor::Singular matrix");
+        exit(0);
      }
-      ImplicitScaling( i ) = 1./big ;
+      ImplicitScaling(i) = 1. / big;
    }
    // Loop over columns of Crout's method :
-    for( int j=0; j<N; j++ ) {
-      for( int i=0; i<j; i++ ) {
-	theSum = LU( i, j ) ;
-	theSum -= matrix_matrix_product_sliced(LU, i, 0, i-1, LU, 0, j) ;
-	//	theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
-	LU( i, j ) = theSum ;
+    for (int j = 0; j < N; j++) {
+      for (int i = 0; i < j; i++) {
+        theSum = LU(i, j);
+        theSum -= matrix_matrix_product_sliced(LU, i, 0, i - 1, LU, 0, j);
+        //	theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
+        LU(i, j) = theSum;
      }
-      
+
      // Search for the largest pivot element :
-      big = 0. ;
-      for( int i=j; i<N; i++ ) {
-	theSum = LU( i, j ) ;
-	theSum -= matrix_matrix_product_sliced(LU, i, 0, j-1, LU, 0, j) ;
-	//	theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
-	LU( i, j ) = theSum ;
-	if( (ImplicitScaling( i )*abs( theSum ))>=big ) {
-	  dum = ImplicitScaling( i )*abs( theSum ) ;
-	  big = dum ;
-	  index_max = i ;
-	}
+      big = 0.;
+      for (int i = j; i < N; i++) {
+        theSum = LU(i, j);
+        theSum -= matrix_matrix_product_sliced(LU, i, 0, j - 1, LU, 0, j);
+        //	theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
+        LU(i, j) = theSum;
+        if ((ImplicitScaling(i) * abs(theSum)) >= big) {
+          dum = ImplicitScaling(i) * abs(theSum);
+          big = dum;
+          index_max = i;
+        }
      }
      // Interchanging rows and the scale factor :
-      if( j!=index_max ) {
-	for( int k=0; k<N; k++ ) {
-	  dum = LU( index_max, k ) ;
-	  LU( index_max, k ) = LU( j, k ) ;
-	  LU( j, k ) = dum ;
-	}
-	ImplicitScaling( index_max ) = ImplicitScaling( j ) ;
+      if (j != index_max) {
+        for (int k = 0; k < N; k++) {
+          dum = LU(index_max, k);
+          LU(index_max, k) = LU(j, k);
+          LU(j, k) = dum;
+        }
+        ImplicitScaling(index_max) = ImplicitScaling(j);
      }
-      pivot( j ) = index_max ;
-      if ( LU( j, j )==0. ) LU( j, j ) = 1.e-20 ;
+      pivot(j) = index_max;
+      if (LU(j, j) == 0.) LU(j, j) = 1.e-20;
      // Divide by the pivot element :
-      if( j<N ) {
-	dum = 1./LU( j, j ) ;
-	for( int i=j+1; i<N; i++ ) LU( i, j ) *= dum ;
+      if (j < N) {
+        dum = 1. / LU(j, j);
+        for (int i = j + 1; i < N; i++) LU(i, j) *= dum;
      }
    }
-
  }

-  inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
-  {
-
+  inline static void LU_solve(const gene_matrix &LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N) {
    // Pour conserver le meme header, on travaille sur X, copie du second-membre B
-    X = B.copy() ;
-    ASSERT( LU.rows()==LU.cols() ) ;
-    firstIndex indI ;
+    X = B.copy();
+    ASSERT(LU.rows() == LU.cols());
+    firstIndex indI;
    // Forward substitution :
-    int ii = 0 ;
-    real theSum = 0. ;
-    for( int i=0; i<N; i++ ) {
-      int ip = pivot( i ) ;
-      theSum = X( ip ) ;
+    int ii = 0;
+    real theSum = 0.;
+    for (int i = 0; i < N; i++) {
+      int ip = pivot(i);
+      theSum = X(ip);
      //      theSum = B( ip ) ;
-      X( ip ) = X( i ) ;
+      X(ip) = X(i);
      //      B( ip ) = B( i ) ;
-      if( ii ) {
-	theSum -= matrix_vector_product_sliced(LU, X, i, ii-1, i-1) ;
-	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
-	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
-      } else if( theSum ) {
-	ii = i+1 ;
+      if (ii) {
+        theSum -= matrix_vector_product_sliced(LU, X, i, ii - 1, i - 1);
+        //	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
+        //	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
+      } else if (theSum) {
+        ii = i + 1;
      }
-      X( i ) = theSum ;
+      X(i) = theSum;
      //      B( i ) = theSum ;
    }
    // Backsubstitution :
-    for( int i=N-1; i>=0; i-- ) {
-      theSum = X( i ) ;
+    for (int i = N - 1; i >= 0; i--) {
+      theSum = X(i);
      //      theSum = B( i ) ;
-      theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ;
+      theSum -= matrix_vector_product_sliced(LU, X, i, i + 1, N);
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ;
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ;
      // Store a component of the solution vector :
-      X( i ) = theSum/LU( i, i ) ;
+      X(i) = theSum / LU(i, i);
      //      B( i ) = theSum/LU( i, i ) ;
    }
-
  }
-
 };

 #endif
--- a/bench/btl/libs/blitz/blitz_interface.hh
+++ b/bench/btl/libs/blitz/blitz_interface.hh
@ -30,118 +30,108 @@

 BZ_USING_NAMESPACE(blitz)

-template<class real>
-class blitz_interface{
+template <class real>
+class blitz_interface {
+ public:
+  typedef real real_type;

-public :
+  typedef std::vector<real> stl_vector;
+  typedef std::vector<stl_vector> stl_matrix;

-  typedef real real_type ;
-
-  typedef std::vector<real>  stl_vector;
-  typedef std::vector<stl_vector > stl_matrix;
-
-  typedef blitz::Array<real, 2>  gene_matrix;
-  typedef blitz::Array<real, 1>  gene_vector;
-//   typedef blitz::Matrix<real, blitz::ColumnMajor>  gene_matrix;
-//   typedef blitz::Vector<real> gene_vector;
+  typedef blitz::Array<real, 2> gene_matrix;
+  typedef blitz::Array<real, 1> gene_vector;
+  //   typedef blitz::Matrix<real, blitz::ColumnMajor>  gene_matrix;
+  //   typedef blitz::Vector<real> gene_vector;

  static inline std::string name() { return "blitz"; }

-  static void free_matrix(gene_matrix & A, int N){}
+  static void free_matrix(gene_matrix& A, int N) {}

-  static void free_vector(gene_vector & B){}
+  static void free_vector(gene_vector& B) {}

-  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
-    A.resize(A_stl[0].size(),A_stl.size());
-    for (int j=0; j<A_stl.size() ; j++){
-      for (int i=0; i<A_stl[j].size() ; i++){
-        A(i,j)=A_stl[j][i];
+  static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
+    A.resize(A_stl[0].size(), A_stl.size());
+    for (int j = 0; j < A_stl.size(); j++) {
+      for (int i = 0; i < A_stl[j].size(); i++) {
+        A(i, j) = A_stl[j][i];
      }
    }
  }

-  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+  static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
    B.resize(B_stl.size());
-    for (int i=0; i<B_stl.size() ; i++){
-      B(i)=B_stl[i];
+    for (int i = 0; i < B_stl.size(); i++) {
+      B(i) = B_stl[i];
    }
  }

-  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    for (int i=0; i<B_stl.size() ; i++){
-      B_stl[i]=B(i);
+  static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
+    for (int i = 0; i < B_stl.size(); i++) {
+      B_stl[i] = B(i);
    }
  }

-  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
-    int N=A_stl.size();
-    for (int j=0;j<N;j++){
+  static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
+    int N = A_stl.size();
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++)
-        A_stl[j][i] = A(i,j);
+      for (int i = 0; i < N; i++) A_stl[j][i] = A(i, j);
    }
  }

-  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
-  {
+  static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
    firstIndex i;
    secondIndex j;
    thirdIndex k;
-    X = sum(A(i,k) * B(k,j), k);
+    X = sum(A(i, k) * B(k, j), k);
  }

-  static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
-  {
+  static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) {
    firstIndex i;
    secondIndex j;
    thirdIndex k;
-    X = sum(A(k,i) * A(k,j), k);
+    X = sum(A(k, i) * A(k, j), k);
  }

-  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
-  {
+  static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) {
    firstIndex i;
    secondIndex j;
    thirdIndex k;
-    X = sum(A(i,k) * A(j,k), k);
+    X = sum(A(i, k) * A(j, k), k);
  }

-  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
+  static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
    firstIndex i;
    secondIndex j;
-    X = sum(A(i,j)*B(j),j);
+    X = sum(A(i, j) * B(j), j);
  }

-  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
+  static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
    firstIndex i;
    secondIndex j;
-    X = sum(A(j,i) * B(j),j);
+    X = sum(A(j, i) * B(j), j);
  }

-  static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N)
-  {
+  static inline void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) {
    firstIndex i;
    Y = Y(i) + coef * X(i);
-    //Y += coef * X;
+    // Y += coef * X;
  }

-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) {
    cible = source;
-    //cible.template operator=<gene_matrix>(source);
-//     for (int i=0;i<N;i++){
-//       for (int j=0;j<N;j++){
-//         cible(i,j)=source(i,j);
-//       }
-//     }
+    // cible.template operator=<gene_matrix>(source);
+    //     for (int i=0;i<N;i++){
+    //       for (int j=0;j<N;j++){
+    //         cible(i,j)=source(i,j);
+    //       }
+    //     }
  }

-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    //cible.template operator=<gene_vector>(source);
+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) {
+    // cible.template operator=<gene_vector>(source);
    cible = source;
  }
-
 };

 #endif
--- a/bench/btl/libs/blitz/btl_blitz.cpp
+++ b/bench/btl/libs/blitz/btl_blitz.cpp
@ -31,21 +31,17 @@

 BTL_MAIN;

-int main()
-{
+int main() {
+  bench<Action_matrix_vector_product<blitz_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<blitz_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);

-  bench<Action_matrix_vector_product<blitz_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<blitz_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+  bench<Action_matrix_matrix_product<blitz_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_ata_product<blitz_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_aat_product<blitz_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

-  bench<Action_matrix_matrix_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_ata_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_aat_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_axpy<blitz_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_axpy<blitz_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-
-  //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+  // bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/blitz/btl_tiny_blitz.cpp
+++ b/bench/btl/libs/blitz/btl_tiny_blitz.cpp
@ -26,13 +26,10 @@

 BTL_MAIN;

-int main()
-{
-  bench_static<Action_axpy,tiny_blitz_interface>();
-  bench_static<Action_matrix_matrix_product,tiny_blitz_interface>();
-  bench_static<Action_matrix_vector_product,tiny_blitz_interface>();
+int main() {
+  bench_static<Action_axpy, tiny_blitz_interface>();
+  bench_static<Action_matrix_matrix_product, tiny_blitz_interface>();
+  bench_static<Action_matrix_vector_product, tiny_blitz_interface>();

  return 0;
 }
-
-
--- a/bench/btl/libs/blitz/tiny_blitz_interface.hh
+++ b/bench/btl/libs/blitz/tiny_blitz_interface.hh
@ -30,77 +30,62 @@

 BZ_USING_NAMESPACE(blitz)

-template<class real, int SIZE>
-class tiny_blitz_interface
-{
+template <class real, int SIZE>
+class tiny_blitz_interface {
+ public:
+  typedef real real_type;

-public :
+  typedef std::vector<real> stl_vector;
+  typedef std::vector<stl_vector> stl_matrix;

-  typedef real real_type ;
-
-  typedef std::vector<real>  stl_vector;
-  typedef std::vector<stl_vector > stl_matrix;
-
-  typedef TinyVector<real,SIZE> gene_vector;
-  typedef TinyMatrix<real,SIZE,SIZE> gene_matrix;
+  typedef TinyVector<real, SIZE> gene_vector;
+  typedef TinyMatrix<real, SIZE, SIZE> gene_matrix;

  static inline std::string name() { return "tiny_blitz"; }

-  static void free_matrix(gene_matrix & A, int N){}
+  static void free_matrix(gene_matrix& A, int N) {}

-  static void free_vector(gene_vector & B){}
+  static void free_vector(gene_vector& B) {}

-  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
-    for (int j=0; j<A_stl.size() ; j++)
-      for (int i=0; i<A_stl[j].size() ; i++)
-        A(i,j)=A_stl[j][i];
+  static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
+    for (int j = 0; j < A_stl.size(); j++)
+      for (int i = 0; i < A_stl[j].size(); i++) A(i, j) = A_stl[j][i];
  }

-  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
-    for (int i=0; i<B_stl.size() ; i++)
-      B(i) = B_stl[i];
+  static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
+    for (int i = 0; i < B_stl.size(); i++) B(i) = B_stl[i];
  }

-  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    for (int i=0; i<B_stl.size() ; i++)
-      B_stl[i] = B(i);
+  static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
+    for (int i = 0; i < B_stl.size(); i++) B_stl[i] = B(i);
  }

-  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
+  static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
    int N = A_stl.size();
-    for (int j=0;j<N;j++)
-    {
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++)
-        A_stl[j][i] = A(i,j);
+      for (int i = 0; i < N; i++) A_stl[j][i] = A(i, j);
    }
  }

-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
-    for (int j=0;j<N;j++)
-      for (int i=0;i<N;i++)
-        cible(i,j) = source(i,j);
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) {
+    for (int j = 0; j < N; j++)
+      for (int i = 0; i < N; i++) cible(i, j) = source(i, j);
  }

-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    for (int i=0;i<N;i++){
+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) {
+    for (int i = 0; i < N; i++) {
      cible(i) = source(i);
    }
  }

-  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = product(A,B);
+  static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
+    X = product(A, B);
  }

-  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = product(A,B);
-  }
-
-  static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
-    Y += coef * X;
-  }
+  static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) { X = product(A, B); }

+  static inline void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }
 };

-
 #endif
--- a/bench/btl/libs/eigen2/btl_tiny_eigen2.cpp
+++ b/bench/btl/libs/eigen2/btl_tiny_eigen2.cpp
@ -30,17 +30,13 @@

 BTL_MAIN;

-int main()
-{
-
-  bench_static<Action_axpy,eigen2_interface>();
-  bench_static<Action_matrix_matrix_product,eigen2_interface>();
-  bench_static<Action_matrix_vector_product,eigen2_interface>();
-  bench_static<Action_atv_product,eigen2_interface>();
-  bench_static<Action_cholesky,eigen2_interface>();
-  bench_static<Action_trisolve,eigen2_interface>();
+int main() {
+  bench_static<Action_axpy, eigen2_interface>();
+  bench_static<Action_matrix_matrix_product, eigen2_interface>();
+  bench_static<Action_matrix_vector_product, eigen2_interface>();
+  bench_static<Action_atv_product, eigen2_interface>();
+  bench_static<Action_cholesky, eigen2_interface>();
+  bench_static<Action_trisolve, eigen2_interface>();

  return 0;
 }
-
-
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@ -27,142 +27,133 @@

 using namespace Eigen;

-template<class real, int SIZE=Dynamic>
-class eigen2_interface
-{
-
-public :
-
-  enum {IsFixedSize = (SIZE!=Dynamic)};
+template <class real, int SIZE = Dynamic>
+class eigen2_interface {
+ public:
+  enum { IsFixedSize = (SIZE != Dynamic) };

  typedef real real_type;

  typedef std::vector<real> stl_vector;
  typedef std::vector<stl_vector> stl_matrix;

-  typedef Eigen::Matrix<real,SIZE,SIZE> gene_matrix;
-  typedef Eigen::Matrix<real,SIZE,1> gene_vector;
+  typedef Eigen::Matrix<real, SIZE, SIZE> gene_matrix;
+  typedef Eigen::Matrix<real, SIZE, 1> gene_vector;

-  static inline std::string name( void )
-  {
-    #if defined(EIGEN_VECTORIZE_SSE)
-    if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
-    #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
-    if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
-    #else
-    if (SIZE==Dynamic) return "eigen2_novec"; else return "tiny_eigen2_novec";
-    #endif
+  static inline std::string name(void) {
+#if defined(EIGEN_VECTORIZE_SSE)
+    if (SIZE == Dynamic)
+      return "eigen2";
+    else
+      return "tiny_eigen2";
+#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
+    if (SIZE == Dynamic)
+      return "eigen2";
+    else
+      return "tiny_eigen2";
+#else
+    if (SIZE == Dynamic)
+      return "eigen2_novec";
+    else
+      return "tiny_eigen2_novec";
+#endif
  }

-  static void free_matrix(gene_matrix & A, int N) {}
+  static void free_matrix(gene_matrix& A, int N) {}

-  static void free_vector(gene_vector & B) {}
+  static void free_vector(gene_vector& B) {}

-  static BTL_DONT_INLINE void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+  static BTL_DONT_INLINE void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
    A.resize(A_stl[0].size(), A_stl.size());

-    for (int j=0; j<A_stl.size() ; j++){
-      for (int i=0; i<A_stl[j].size() ; i++){
-        A.coeffRef(i,j) = A_stl[j][i];
+    for (int j = 0; j < A_stl.size(); j++) {
+      for (int i = 0; i < A_stl[j].size(); i++) {
+        A.coeffRef(i, j) = A_stl[j][i];
      }
    }
  }

-  static BTL_DONT_INLINE  void vector_from_stl(gene_vector & B, stl_vector & B_stl){
-    B.resize(B_stl.size(),1);
+  static BTL_DONT_INLINE void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
+    B.resize(B_stl.size(), 1);

-    for (int i=0; i<B_stl.size() ; i++){
+    for (int i = 0; i < B_stl.size(); i++) {
      B.coeffRef(i) = B_stl[i];
    }
  }

-  static BTL_DONT_INLINE  void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    for (int i=0; i<B_stl.size() ; i++){
+  static BTL_DONT_INLINE void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
+    for (int i = 0; i < B_stl.size(); i++) {
      B_stl[i] = B.coeff(i);
    }
  }

-  static BTL_DONT_INLINE  void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
-    int N=A_stl.size();
+  static BTL_DONT_INLINE void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
+    int N = A_stl.size();

-    for (int j=0;j<N;j++){
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++){
-        A_stl[j][i] = A.coeff(i,j);
+      for (int i = 0; i < N; i++) {
+        A_stl[j][i] = A.coeff(i, j);
      }
    }
  }

-  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (A*B).lazy();
+  static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
+    X = (A * B).lazy();
  }

-  static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (A.transpose()*B.transpose()).lazy();
+  static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
+                                                      int N) {
+    X = (A.transpose() * B.transpose()).lazy();
  }

-  static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
-    X = (A.transpose()*A).lazy();
+  static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A.transpose() * A).lazy(); }
+
+  static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A * A.transpose()).lazy(); }
+
+  static inline void matrix_vector_product(const gene_matrix& A, const gene_vector& B, gene_vector& X, int N) {
+    X = (A * B) /*.lazy()*/;
  }

-  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
-    X = (A*A.transpose()).lazy();
+  static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    X = (A.transpose() * B) /*.lazy()*/;
  }

-  static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){
-    X = (A*B)/*.lazy()*/;
-  }
+  static inline void axpy(real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }

-  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = (A.transpose()*B)/*.lazy()*/;
-  }
+  static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) { Y = a * X + b * Y; }

-  static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
-    Y += coef * X;
-  }
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { cible = source; }

-  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
-    Y = a*X + b*Y;
-  }
+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { cible = source; }

-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
-    cible = source;
-  }
-
-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    cible = source;
-  }
-
-  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){
+  static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
    X = L.template marked<LowerTriangular>().solveTriangular(B);
  }

-  static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
+  static inline void trisolve_lower_matrix(const gene_matrix& L, const gene_matrix& B, gene_matrix& X, int N) {
    X = L.template marked<LowerTriangular>().solveTriangular(B);
  }

-  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
+  static inline void cholesky(const gene_matrix& X, gene_matrix& C, int N) {
    C = X.llt().matrixL();
-//     C = X;
-//     Cholesky<gene_matrix>::computeInPlace(C);
-//     Cholesky<gene_matrix>::computeInPlaceBlock(C);
+    //     C = X;
+    //     Cholesky<gene_matrix>::computeInPlace(C);
+    //     Cholesky<gene_matrix>::computeInPlaceBlock(C);
  }

-  static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
+  static inline void lu_decomp(const gene_matrix& X, gene_matrix& C, int N) {
    C = X.lu().matrixLU();
-//     C = X.inverse();
+    //     C = X.inverse();
  }

-  static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
+  static inline void tridiagonalization(const gene_matrix& X, gene_matrix& C, int N) {
    C = Tridiagonalization<gene_matrix>(X).packedMatrix();
  }

-  static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
+  static inline void hessenberg(const gene_matrix& X, gene_matrix& C, int N) {
    C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
  }
-
-
-
 };

 #endif
--- a/bench/btl/libs/eigen2/main_adv.cpp
+++ b/bench/btl/libs/eigen2/main_adv.cpp
@ -27,18 +27,15 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_trisolve_matrix<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_lu_decomp<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_partial_lu<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+int main() {
+  bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_trisolve_matrix<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_lu_decomp<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  //   bench<Action_partial_lu<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

-  bench<Action_hessenberg<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_tridiagonalization<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_hessenberg<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_tridiagonalization<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/eigen2/main_linear.cpp
+++ b/bench/btl/libs/eigen2/main_linear.cpp
@ -22,13 +22,9 @@

 BTL_MAIN;

-int main()
-{
+int main() {
+  bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  
  return 0;
 }
-
-
--- a/bench/btl/libs/eigen2/main_matmat.cpp
+++ b/bench/btl/libs/eigen2/main_matmat.cpp
@ -22,14 +22,11 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_trmm<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+int main() {
+  bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  //   bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  //   bench<Action_trmm<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/eigen2/main_vecmat.cpp
+++ b/bench/btl/libs/eigen2/main_vecmat.cpp
@ -22,15 +22,12 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-//   bench<Action_symv<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-//   bench<Action_syr2<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-//   bench<Action_ger<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+int main() {
+  bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  //   bench<Action_symv<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+  //   bench<Action_syr2<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+  //   bench<Action_ger<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/eigen3/btl_tiny_eigen3.cpp
+++ b/bench/btl/libs/eigen3/btl_tiny_eigen3.cpp
@ -30,17 +30,13 @@

 BTL_MAIN;

-int main()
-{
-
-  bench_static<Action_axpy,eigen2_interface>();
-  bench_static<Action_matrix_matrix_product,eigen2_interface>();
-  bench_static<Action_matrix_vector_product,eigen2_interface>();
-  bench_static<Action_atv_product,eigen2_interface>();
-  bench_static<Action_cholesky,eigen2_interface>();
-  bench_static<Action_trisolve,eigen2_interface>();
+int main() {
+  bench_static<Action_axpy, eigen2_interface>();
+  bench_static<Action_matrix_matrix_product, eigen2_interface>();
+  bench_static<Action_matrix_vector_product, eigen2_interface>();
+  bench_static<Action_atv_product, eigen2_interface>();
+  bench_static<Action_cholesky, eigen2_interface>();
+  bench_static<Action_trisolve, eigen2_interface>();

  return 0;
 }
-
-
--- a/bench/btl/libs/eigen3/eigen3_interface.hh
+++ b/bench/btl/libs/eigen3/eigen3_interface.hh
@ -24,219 +24,201 @@

 using namespace Eigen;

-template<class real, int SIZE=Dynamic>
-class eigen3_interface
-{
-
-public :
-
-  enum {IsFixedSize = (SIZE!=Dynamic)};
+template <class real, int SIZE = Dynamic>
+class eigen3_interface {
+ public:
+  enum { IsFixedSize = (SIZE != Dynamic) };

  typedef real real_type;

  typedef std::vector<real> stl_vector;
  typedef std::vector<stl_vector> stl_matrix;

-  typedef Eigen::Matrix<real,SIZE,SIZE> gene_matrix;
-  typedef Eigen::Matrix<real,SIZE,1> gene_vector;
+  typedef Eigen::Matrix<real, SIZE, SIZE> gene_matrix;
+  typedef Eigen::Matrix<real, SIZE, 1> gene_vector;

-  static inline std::string name( void )
-  {
-    return EIGEN_MAKESTRING(BTL_PREFIX);
-  }
+  static inline std::string name(void) { return EIGEN_MAKESTRING(BTL_PREFIX); }

-  static void free_matrix(gene_matrix & /*A*/, int /*N*/) {}
+  static void free_matrix(gene_matrix& /*A*/, int /*N*/) {}

-  static void free_vector(gene_vector & /*B*/) {}
+  static void free_vector(gene_vector& /*B*/) {}

-  static BTL_DONT_INLINE void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+  static BTL_DONT_INLINE void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
    A.resize(A_stl[0].size(), A_stl.size());

-    for (unsigned int j=0; j<A_stl.size() ; j++){
-      for (unsigned int i=0; i<A_stl[j].size() ; i++){
-        A.coeffRef(i,j) = A_stl[j][i];
+    for (unsigned int j = 0; j < A_stl.size(); j++) {
+      for (unsigned int i = 0; i < A_stl[j].size(); i++) {
+        A.coeffRef(i, j) = A_stl[j][i];
      }
    }
  }

-  static BTL_DONT_INLINE  void vector_from_stl(gene_vector & B, stl_vector & B_stl){
-    B.resize(B_stl.size(),1);
+  static BTL_DONT_INLINE void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
+    B.resize(B_stl.size(), 1);

-    for (unsigned int i=0; i<B_stl.size() ; i++){
+    for (unsigned int i = 0; i < B_stl.size(); i++) {
      B.coeffRef(i) = B_stl[i];
    }
  }

-  static BTL_DONT_INLINE  void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    for (unsigned int i=0; i<B_stl.size() ; i++){
+  static BTL_DONT_INLINE void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
+    for (unsigned int i = 0; i < B_stl.size(); i++) {
      B_stl[i] = B.coeff(i);
    }
  }

-  static BTL_DONT_INLINE  void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
-    int  N=A_stl.size();
+  static BTL_DONT_INLINE void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
+    int N = A_stl.size();

-    for (int j=0;j<N;j++){
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++){
-        A_stl[j][i] = A.coeff(i,j);
+      for (int i = 0; i < N; i++) {
+        A_stl[j][i] = A.coeff(i, j);
      }
    }
  }

-  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int  /*N*/){
-    X.noalias() = A*B;
+  static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int /*N*/) {
+    X.noalias() = A * B;
  }

-  static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int  /*N*/){
-    X.noalias() = A.transpose()*B.transpose();
+  static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
+                                                      int /*N*/) {
+    X.noalias() = A.transpose() * B.transpose();
  }

-  static inline void ata_product(const gene_matrix & A, gene_matrix & X, int  /*N*/){
-    //X.noalias() = A.transpose()*A;
+  static inline void ata_product(const gene_matrix& A, gene_matrix& X, int /*N*/) {
+    // X.noalias() = A.transpose()*A;
    X.template triangularView<Lower>().setZero();
    X.template selfadjointView<Lower>().rankUpdate(A.transpose());
  }

-  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int  /*N*/){
+  static inline void aat_product(const gene_matrix& A, gene_matrix& X, int /*N*/) {
    X.template triangularView<Lower>().setZero();
    X.template selfadjointView<Lower>().rankUpdate(A);
  }

-  static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int  /*N*/){
-    X.noalias() = A*B;
+  static inline void matrix_vector_product(const gene_matrix& A, const gene_vector& B, gene_vector& X, int /*N*/) {
+    X.noalias() = A * B;
  }

-  static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int  /*N*/){
+  static inline void symv(const gene_matrix& A, const gene_vector& B, gene_vector& X, int /*N*/) {
    X.noalias() = (A.template selfadjointView<Lower>() * B);
-//     internal::product_selfadjoint_vector<real,0,LowerTriangularBit,false,false>(N,A.data(),N, B.data(), 1, X.data(), 1);
+    //     internal::product_selfadjoint_vector<real,0,LowerTriangularBit,false,false>(N,A.data(),N, B.data(), 1,
+    //     X.data(), 1);
  }

-  template<typename Dest, typename Src> static void triassign(Dest& dst, const Src& src)
-  {
+  template <typename Dest, typename Src>
+  static void triassign(Dest& dst, const Src& src) {
    typedef typename Dest::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type Packet;
-    const int PacketSize = sizeof(Packet)/sizeof(Scalar);
+    const int PacketSize = sizeof(Packet) / sizeof(Scalar);
    int size = dst.cols();
-    for(int j=0; j<size; j+=1)
-    {
-//       const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
-      Scalar* A0 = dst.data() + j*dst.stride();
+    for (int j = 0; j < size; j += 1) {
+      //       const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+      Scalar* A0 = dst.data() + j * dst.stride();
      int starti = j;
      int alignedEnd = starti;
-      int alignedStart = (starti) + internal::first_aligned(&A0[starti], size-starti);
-      alignedEnd = alignedStart + ((size-alignedStart)/(2*PacketSize))*(PacketSize*2);
+      int alignedStart = (starti) + internal::first_aligned(&A0[starti], size - starti);
+      alignedEnd = alignedStart + ((size - alignedStart) / (2 * PacketSize)) * (PacketSize * 2);

      // do the non-vectorizable part of the assignment
-      for (int index = starti; index<alignedStart ; ++index)
-      {
-        if(Dest::Flags&RowMajorBit)
+      for (int index = starti; index < alignedStart; ++index) {
+        if (Dest::Flags & RowMajorBit)
          dst.copyCoeff(j, index, src);
        else
          dst.copyCoeff(index, j, src);
      }

      // do the vectorizable part of the assignment
-      for (int index = alignedStart; index<alignedEnd; index+=PacketSize)
-      {
-        if(Dest::Flags&RowMajorBit)
+      for (int index = alignedStart; index < alignedEnd; index += PacketSize) {
+        if (Dest::Flags & RowMajorBit)
          dst.template copyPacket<Src, Aligned, Unaligned>(j, index, src);
        else
          dst.template copyPacket<Src, Aligned, Unaligned>(index, j, src);
      }

      // do the non-vectorizable part of the assignment
-      for (int index = alignedEnd; index<size; ++index)
-      {
-        if(Dest::Flags&RowMajorBit)
+      for (int index = alignedEnd; index < size; ++index) {
+        if (Dest::Flags & RowMajorBit)
          dst.copyCoeff(j, index, src);
        else
          dst.copyCoeff(index, j, src);
      }
-      //dst.col(j).tail(N-j) = src.col(j).tail(N-j);
+      // dst.col(j).tail(N-j) = src.col(j).tail(N-j);
    }
  }

-  static EIGEN_DONT_INLINE void syr2(gene_matrix & A,  gene_vector & X, gene_vector & Y, int  N){
-    // internal::product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1, -1);
-    for(int j=0; j<N; ++j)
-      A.col(j).tail(N-j) += X[j] * Y.tail(N-j) + Y[j] * X.tail(N-j);
+  static EIGEN_DONT_INLINE void syr2(gene_matrix& A, gene_vector& X, gene_vector& Y, int N) {
+    // internal::product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1,
+    // -1);
+    for (int j = 0; j < N; ++j) A.col(j).tail(N - j) += X[j] * Y.tail(N - j) + Y[j] * X.tail(N - j);
  }

-  static EIGEN_DONT_INLINE void ger(gene_matrix & A,  gene_vector & X, gene_vector & Y, int  N){
-    for(int j=0; j<N; ++j)
-      A.col(j) += X * Y[j];
+  static EIGEN_DONT_INLINE void ger(gene_matrix& A, gene_vector& X, gene_vector& Y, int N) {
+    for (int j = 0; j < N; ++j) A.col(j) += X * Y[j];
  }

-  static EIGEN_DONT_INLINE void rot(gene_vector & A,  gene_vector & B, real c, real s, int  /*N*/){
-    internal::apply_rotation_in_the_plane(A, B, JacobiRotation<real>(c,s));
+  static EIGEN_DONT_INLINE void rot(gene_vector& A, gene_vector& B, real c, real s, int /*N*/) {
+    internal::apply_rotation_in_the_plane(A, B, JacobiRotation<real>(c, s));
  }

-  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int  /*N*/){
-    X.noalias() = (A.transpose()*B);
+  static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int /*N*/) {
+    X.noalias() = (A.transpose() * B);
  }

-  static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int  /*N*/){
-    Y += coef * X;
-  }
+  static inline void axpy(real coef, const gene_vector& X, gene_vector& Y, int /*N*/) { Y += coef * X; }

-  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int  /*N*/){
-    Y = a*X + b*Y;
-  }
+  static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int /*N*/) { Y = a * X + b * Y; }

-  static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix & source, gene_matrix & cible, int  /*N*/){
+  static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix& source, gene_matrix& cible, int /*N*/) {
    cible = source;
  }

-  static EIGEN_DONT_INLINE void copy_vector(const gene_vector & source, gene_vector & cible, int  /*N*/){
+  static EIGEN_DONT_INLINE void copy_vector(const gene_vector& source, gene_vector& cible, int /*N*/) {
    cible = source;
  }

-  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int  /*N*/){
+  static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int /*N*/) {
    X = L.template triangularView<Lower>().solve(B);
  }

-  static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int  /*N*/){
+  static inline void trisolve_lower_matrix(const gene_matrix& L, const gene_matrix& B, gene_matrix& X, int /*N*/) {
    X = L.template triangularView<Upper>().solve(B);
  }

-  static inline void trmm(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int  /*N*/){
+  static inline void trmm(const gene_matrix& L, const gene_matrix& B, gene_matrix& X, int /*N*/) {
    X.noalias() = L.template triangularView<Lower>() * B;
  }

-  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int  /*N*/){
+  static inline void cholesky(const gene_matrix& X, gene_matrix& C, int /*N*/) {
    C = X;
-    internal::llt_inplace<real,Lower>::blocked(C);
-    //C = X.llt().matrixL();
-//     C = X;
-//     Cholesky<gene_matrix>::computeInPlace(C);
-//     Cholesky<gene_matrix>::computeInPlaceBlock(C);
+    internal::llt_inplace<real, Lower>::blocked(C);
+    // C = X.llt().matrixL();
+    //     C = X;
+    //     Cholesky<gene_matrix>::computeInPlace(C);
+    //     Cholesky<gene_matrix>::computeInPlaceBlock(C);
  }

-  static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int  /*N*/){
-    C = X.fullPivLu().matrixLU();
-  }
+  static inline void lu_decomp(const gene_matrix& X, gene_matrix& C, int /*N*/) { C = X.fullPivLu().matrixLU(); }

-  static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int  N){
-    Matrix<DenseIndex,1,Dynamic> piv(N);
+  static inline void partial_lu_decomp(const gene_matrix& X, gene_matrix& C, int N) {
+    Matrix<DenseIndex, 1, Dynamic> piv(N);
    DenseIndex nb;
    C = X;
-    internal::partial_lu_inplace(C,piv,nb);
-//     C = X.partialPivLu().matrixLU();
+    internal::partial_lu_inplace(C, piv, nb);
+    //     C = X.partialPivLu().matrixLU();
  }

-  static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int  N){
-    typename Tridiagonalization<gene_matrix>::CoeffVectorType aux(N-1);
+  static inline void tridiagonalization(const gene_matrix& X, gene_matrix& C, int N) {
+    typename Tridiagonalization<gene_matrix>::CoeffVectorType aux(N - 1);
    C = X;
    internal::tridiagonalization_inplace(C, aux);
  }

-  static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int  /*N*/){
+  static inline void hessenberg(const gene_matrix& X, gene_matrix& C, int /*N*/) {
    C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
  }
-
-
-
 };

 #endif
--- a/bench/btl/libs/eigen3/main_adv.cpp
+++ b/bench/btl/libs/eigen3/main_adv.cpp
@ -27,18 +27,15 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_trisolve<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  bench<Action_trisolve_matrix<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  bench<Action_cholesky<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-//   bench<Action_lu_decomp<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  bench<Action_partial_lu<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+int main() {
+  bench<Action_trisolve<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
+  bench<Action_trisolve_matrix<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
+  bench<Action_cholesky<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
+  //   bench<Action_lu_decomp<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+  bench<Action_partial_lu<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);

-//   bench<Action_hessenberg<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-  bench<Action_tridiagonalization<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+  //   bench<Action_hessenberg<eigen3_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+  bench<Action_tridiagonalization<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/eigen3/main_linear.cpp
+++ b/bench/btl/libs/eigen3/main_linear.cpp
@ -22,14 +22,10 @@

 BTL_MAIN;

-int main()
-{
+int main() {
+  bench<Action_axpy<eigen3_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<eigen3_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_rot<eigen3_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_axpy<eigen3_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<eigen3_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_rot<eigen3_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  
  return 0;
 }
-
-
--- a/bench/btl/libs/eigen3/main_matmat.cpp
+++ b/bench/btl/libs/eigen3/main_matmat.cpp
@ -22,14 +22,11 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_trmm<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+int main() {
+  bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_trmm<eigen3_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/eigen3/main_vecmat.cpp
+++ b/bench/btl/libs/eigen3/main_vecmat.cpp
@ -22,15 +22,12 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_matrix_vector_product<eigen3_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<eigen3_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_symv<eigen3_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_syr2<eigen3_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_ger<eigen3_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+int main() {
+  bench<Action_matrix_vector_product<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_symv<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_syr2<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_ger<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/gmm/gmm_LU_solve_interface.hh
+++ b/bench/btl/libs/gmm/gmm_LU_solve_interface.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  blitz_LU_solve_interface.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:31 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef BLITZ_LU_SOLVE_INTERFACE_HH
 #define BLITZ_LU_SOLVE_INTERFACE_HH

@ -25,168 +25,136 @@

 BZ_USING_NAMESPACE(blitz)

-template<class real>
-class blitz_LU_solve_interface : public blitz_interface<real>
-{
-
-public :
-
+template <class real>
+class blitz_LU_solve_interface : public blitz_interface<real> {
+ public:
  typedef typename blitz_interface<real>::gene_matrix gene_matrix;
  typedef typename blitz_interface<real>::gene_vector gene_vector;

-  typedef blitz::Array<int,1> Pivot_Vector;
+  typedef blitz::Array<int, 1> Pivot_Vector;

-  inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
-  {
+  inline static void new_Pivot_Vector(Pivot_Vector &pivot, int N) { pivot.resize(N); }

-    pivot.resize(N);
+  inline static void free_Pivot_Vector(Pivot_Vector &pivot) { return; }

-  }
+  static inline real matrix_vector_product_sliced(const gene_matrix &A, gene_vector B, int row, int col_start,
+                                                  int col_end) {
+    real somme = 0.;

-  inline static void free_Pivot_Vector(Pivot_Vector & pivot)
-  {
-    
-    return;
-
-  }
-
-
-  static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
-  {
-    
-    real somme=0.;
-    
-    for (int j=col_start ; j<col_end+1 ; j++){
-	
-	somme+=A(row,j)*B(j);
-	
+    for (int j = col_start; j < col_end + 1; j++) {
+      somme += A(row, j) * B(j);
    }

    return somme;
-
  }

+  static inline real matrix_matrix_product_sliced(gene_matrix &A, int row, int col_start, int col_end, gene_matrix &B,
+                                                  int row_shift, int col) {
+    real somme = 0.;

-
-
-  static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
-  {
-    
-    real somme=0.;
-    
-    for (int j=col_start ; j<col_end+1 ; j++){
-	
-	somme+=A(row,j)*B(j+row_shift,col);
-	
+    for (int j = col_start; j < col_end + 1; j++) {
+      somme += A(row, j) * B(j + row_shift, col);
    }

    return somme;
-
  }

-  inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
-  {
-
-    ASSERT( LU.rows()==LU.cols() ) ;
-    int index_max = 0 ;
-    real big = 0. ;
-    real theSum = 0. ;
-    real dum = 0. ;
+  inline static void LU_factor(gene_matrix &LU, Pivot_Vector &pivot, int N) {
+    ASSERT(LU.rows() == LU.cols());
+    int index_max = 0;
+    real big = 0.;
+    real theSum = 0.;
+    real dum = 0.;
    // Get the implicit scaling information :
-    gene_vector ImplicitScaling( N ) ;
-    for( int i=0; i<N; i++ ) {
-      big = 0. ;
-      for( int j=0; j<N; j++ ) {
-	if( abs( LU( i, j ) )>=big ) big = abs( LU( i, j ) ) ;
+    gene_vector ImplicitScaling(N);
+    for (int i = 0; i < N; i++) {
+      big = 0.;
+      for (int j = 0; j < N; j++) {
+        if (abs(LU(i, j)) >= big) big = abs(LU(i, j));
      }
-      if( big==0. ) {
-	INFOS( "blitz_LU_factor::Singular matrix" ) ;
-	exit( 0 ) ;
+      if (big == 0.) {
+        INFOS("blitz_LU_factor::Singular matrix");
+        exit(0);
      }
-      ImplicitScaling( i ) = 1./big ;
+      ImplicitScaling(i) = 1. / big;
    }
    // Loop over columns of Crout's method :
-    for( int j=0; j<N; j++ ) {
-      for( int i=0; i<j; i++ ) {
-	theSum = LU( i, j ) ;
-	theSum -= matrix_matrix_product_sliced(LU, i, 0, i-1, LU, 0, j) ;
-	//	theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
-	LU( i, j ) = theSum ;
+    for (int j = 0; j < N; j++) {
+      for (int i = 0; i < j; i++) {
+        theSum = LU(i, j);
+        theSum -= matrix_matrix_product_sliced(LU, i, 0, i - 1, LU, 0, j);
+        //	theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
+        LU(i, j) = theSum;
      }
-      
+
      // Search for the largest pivot element :
-      big = 0. ;
-      for( int i=j; i<N; i++ ) {
-	theSum = LU( i, j ) ;
-	theSum -= matrix_matrix_product_sliced(LU, i, 0, j-1, LU, 0, j) ;
-	//	theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
-	LU( i, j ) = theSum ;
-	if( (ImplicitScaling( i )*abs( theSum ))>=big ) {
-	  dum = ImplicitScaling( i )*abs( theSum ) ;
-	  big = dum ;
-	  index_max = i ;
-	}
+      big = 0.;
+      for (int i = j; i < N; i++) {
+        theSum = LU(i, j);
+        theSum -= matrix_matrix_product_sliced(LU, i, 0, j - 1, LU, 0, j);
+        //	theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
+        LU(i, j) = theSum;
+        if ((ImplicitScaling(i) * abs(theSum)) >= big) {
+          dum = ImplicitScaling(i) * abs(theSum);
+          big = dum;
+          index_max = i;
+        }
      }
      // Interchanging rows and the scale factor :
-      if( j!=index_max ) {
-	for( int k=0; k<N; k++ ) {
-	  dum = LU( index_max, k ) ;
-	  LU( index_max, k ) = LU( j, k ) ;
-	  LU( j, k ) = dum ;
-	}
-	ImplicitScaling( index_max ) = ImplicitScaling( j ) ;
+      if (j != index_max) {
+        for (int k = 0; k < N; k++) {
+          dum = LU(index_max, k);
+          LU(index_max, k) = LU(j, k);
+          LU(j, k) = dum;
+        }
+        ImplicitScaling(index_max) = ImplicitScaling(j);
      }
-      pivot( j ) = index_max ;
-      if ( LU( j, j )==0. ) LU( j, j ) = 1.e-20 ;
+      pivot(j) = index_max;
+      if (LU(j, j) == 0.) LU(j, j) = 1.e-20;
      // Divide by the pivot element :
-      if( j<N ) {
-	dum = 1./LU( j, j ) ;
-	for( int i=j+1; i<N; i++ ) LU( i, j ) *= dum ;
+      if (j < N) {
+        dum = 1. / LU(j, j);
+        for (int i = j + 1; i < N; i++) LU(i, j) *= dum;
      }
    }
-
  }

-  inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
-  {
-
+  inline static void LU_solve(const gene_matrix &LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N) {
    // Pour conserver le meme header, on travaille sur X, copie du second-membre B
-    X = B.copy() ;
-    ASSERT( LU.rows()==LU.cols() ) ;
-    firstIndex indI ;
+    X = B.copy();
+    ASSERT(LU.rows() == LU.cols());
+    firstIndex indI;
    // Forward substitution :
-    int ii = 0 ;
-    real theSum = 0. ;
-    for( int i=0; i<N; i++ ) {
-      int ip = pivot( i ) ;
-      theSum = X( ip ) ;
+    int ii = 0;
+    real theSum = 0.;
+    for (int i = 0; i < N; i++) {
+      int ip = pivot(i);
+      theSum = X(ip);
      //      theSum = B( ip ) ;
-      X( ip ) = X( i ) ;
+      X(ip) = X(i);
      //      B( ip ) = B( i ) ;
-      if( ii ) {
-	theSum -= matrix_vector_product_sliced(LU, X, i, ii-1, i-1) ;
-	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
-	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
-      } else if( theSum ) {
-	ii = i+1 ;
+      if (ii) {
+        theSum -= matrix_vector_product_sliced(LU, X, i, ii - 1, i - 1);
+        //	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
+        //	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
+      } else if (theSum) {
+        ii = i + 1;
      }
-      X( i ) = theSum ;
+      X(i) = theSum;
      //      B( i ) = theSum ;
    }
    // Backsubstitution :
-    for( int i=N-1; i>=0; i-- ) {
-      theSum = X( i ) ;
+    for (int i = N - 1; i >= 0; i--) {
+      theSum = X(i);
      //      theSum = B( i ) ;
-      theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ;
+      theSum -= matrix_vector_product_sliced(LU, X, i, i + 1, N);
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ;
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ;
      // Store a component of the solution vector :
-      X( i ) = theSum/LU( i, i ) ;
+      X(i) = theSum / LU(i, i);
      //      B( i ) = theSum/LU( i, i ) ;
    }
-
  }
-
 };

 #endif
--- a/bench/btl/libs/gmm/gmm_interface.hh
+++ b/bench/btl/libs/gmm/gmm_interface.hh
@ -23,122 +23,101 @@

 using namespace gmm;

-template<class real>
+template <class real>
 class gmm_interface {
+ public:
+  typedef real real_type;

-public :
-
-  typedef real real_type ;
-
-  typedef std::vector<real>  stl_vector;
-  typedef std::vector<stl_vector > stl_matrix;
+  typedef std::vector<real> stl_vector;
+  typedef std::vector<stl_vector> stl_matrix;

  typedef gmm::dense_matrix<real> gene_matrix;
  typedef stl_vector gene_vector;

-  static inline std::string name( void )
-  {
-    return "gmm";
-  }
+  static inline std::string name(void) { return "gmm"; }

-  static void free_matrix(gene_matrix & A, int N){
-    return ;
-  }
+  static void free_matrix(gene_matrix& A, int N) { return; }

-  static void free_vector(gene_vector & B){
-    return ;
-  }
+  static void free_vector(gene_vector& B) { return; }

-  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
-    A.resize(A_stl[0].size(),A_stl.size());
+  static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
+    A.resize(A_stl[0].size(), A_stl.size());

-    for (int j=0; j<A_stl.size() ; j++){
-      for (int i=0; i<A_stl[j].size() ; i++){
-        A(i,j) = A_stl[j][i];
+    for (int j = 0; j < A_stl.size(); j++) {
+      for (int i = 0; i < A_stl[j].size(); i++) {
+        A(i, j) = A_stl[j][i];
      }
    }
  }

-  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
-    B = B_stl;
-  }
+  static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) { B = B_stl; }

-  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    B_stl = B;
-  }
+  static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) { B_stl = B; }

-  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
-    int N=A_stl.size();
+  static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
+    int N = A_stl.size();

-    for (int j=0;j<N;j++){
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++){
-        A_stl[j][i] = A(i,j);
+      for (int i = 0; i < N; i++) {
+        A_stl[j][i] = A(i, j);
      }
    }
  }

-  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    gmm::mult(A,B, X);
+  static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
+    gmm::mult(A, B, X);
  }

-  static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    gmm::mult(gmm::transposed(A),gmm::transposed(B), X);
+  static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
+                                                      int N) {
+    gmm::mult(gmm::transposed(A), gmm::transposed(B), X);
  }

-  static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
-    gmm::mult(gmm::transposed(A),A, X);
+  static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) { gmm::mult(gmm::transposed(A), A, X); }
+
+  static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) { gmm::mult(A, gmm::transposed(A), X); }
+
+  static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    gmm::mult(A, B, X);
  }

-  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
-    gmm::mult(A,gmm::transposed(A), X);
+  static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
+    gmm::mult(gmm::transposed(A), B, X);
  }

-  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    gmm::mult(A,B,X);
+  static inline void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) {
+    gmm::add(gmm::scaled(X, coef), Y);
  }

-  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    gmm::mult(gmm::transposed(A),B,X);
+  static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) {
+    gmm::add(gmm::scaled(X, a), gmm::scaled(Y, b), Y);
  }

-  static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
-    gmm::add(gmm::scaled(X,coef), Y);
-  }
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { gmm::copy(source, cible); }

-  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
-    gmm::add(gmm::scaled(X,a), gmm::scaled(Y,b), Y);
-  }
+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { gmm::copy(source, cible); }

-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
-    gmm::copy(source,cible);
-  }
-
-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    gmm::copy(source,cible);
-  }
-
-  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
-    gmm::copy(B,X);
+  static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
+    gmm::copy(B, X);
    gmm::lower_tri_solve(L, X, false);
  }

-  static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
-    gmm::copy(X,R);
+  static inline void partial_lu_decomp(const gene_matrix& X, gene_matrix& R, int N) {
+    gmm::copy(X, R);
    std::vector<int> ipvt(N);
    gmm::lu_factor(R, ipvt);
  }

-  static inline void hessenberg(const gene_matrix & X, gene_matrix & R, int N){
-    gmm::copy(X,R);
-    gmm::Hessenberg_reduction(R,X,false);
+  static inline void hessenberg(const gene_matrix& X, gene_matrix& R, int N) {
+    gmm::copy(X, R);
+    gmm::Hessenberg_reduction(R, X, false);
  }

-  static inline void tridiagonalization(const gene_matrix & X, gene_matrix & R, int N){
-    gmm::copy(X,R);
-    gmm::Householder_tridiagonalization(R,X,false);
+  static inline void tridiagonalization(const gene_matrix& X, gene_matrix& R, int N) {
+    gmm::copy(X, R);
+    gmm::Householder_tridiagonalization(R, X, false);
  }
-
 };

 #endif
--- a/bench/btl/libs/gmm/main.cpp
+++ b/bench/btl/libs/gmm/main.cpp
@ -24,28 +24,24 @@

 BTL_MAIN;

-int main()
-{
+int main() {
+  bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);

-  bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+  bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  //   bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  //   bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

-  bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  // bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);

-  bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
+  bench<Action_partial_lu<gmm_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

-  bench<Action_partial_lu<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  
-  bench<Action_hessenberg<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-  bench<Action_tridiagonalization<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_hessenberg<gmm_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  bench<Action_tridiagonalization<gmm_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/mtl4/main.cpp
+++ b/bench/btl/libs/mtl4/main.cpp
@ -24,23 +24,19 @@

 BTL_MAIN;

-int main()
-{
+int main() {
+  bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

-  bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
+  bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  //   bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  //   bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

-  bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-  bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-
-  bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//   bench<Action_lu_decomp<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
+  //   bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  //   bench<Action_lu_decomp<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

  return 0;
 }
-
-
--- a/bench/btl/libs/mtl4/mtl4_LU_solve_interface.hh
+++ b/bench/btl/libs/mtl4/mtl4_LU_solve_interface.hh
@ -1,14 +1,14 @@
 //=====================================================
 // File   :  blitz_LU_solve_interface.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:31 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef BLITZ_LU_SOLVE_INTERFACE_HH
 #define BLITZ_LU_SOLVE_INTERFACE_HH

@ -25,168 +25,136 @@

 BZ_USING_NAMESPACE(blitz)

-template<class real>
-class blitz_LU_solve_interface : public blitz_interface<real>
-{
-
-public :
-
+template <class real>
+class blitz_LU_solve_interface : public blitz_interface<real> {
+ public:
  typedef typename blitz_interface<real>::gene_matrix gene_matrix;
  typedef typename blitz_interface<real>::gene_vector gene_vector;

-  typedef blitz::Array<int,1> Pivot_Vector;
+  typedef blitz::Array<int, 1> Pivot_Vector;

-  inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
-  {
+  inline static void new_Pivot_Vector(Pivot_Vector &pivot, int N) { pivot.resize(N); }

-    pivot.resize(N);
+  inline static void free_Pivot_Vector(Pivot_Vector &pivot) { return; }

-  }
+  static inline real matrix_vector_product_sliced(const gene_matrix &A, gene_vector B, int row, int col_start,
+                                                  int col_end) {
+    real somme = 0.;

-  inline static void free_Pivot_Vector(Pivot_Vector & pivot)
-  {
-    
-    return;
-
-  }
-
-
-  static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
-  {
-    
-    real somme=0.;
-    
-    for (int j=col_start ; j<col_end+1 ; j++){
-	
-	somme+=A(row,j)*B(j);
-	
+    for (int j = col_start; j < col_end + 1; j++) {
+      somme += A(row, j) * B(j);
    }

    return somme;
-
  }

+  static inline real matrix_matrix_product_sliced(gene_matrix &A, int row, int col_start, int col_end, gene_matrix &B,
+                                                  int row_shift, int col) {
+    real somme = 0.;

-
-
-  static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
-  {
-    
-    real somme=0.;
-    
-    for (int j=col_start ; j<col_end+1 ; j++){
-	
-	somme+=A(row,j)*B(j+row_shift,col);
-	
+    for (int j = col_start; j < col_end + 1; j++) {
+      somme += A(row, j) * B(j + row_shift, col);
    }

    return somme;
-
  }

-  inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
-  {
-
-    ASSERT( LU.rows()==LU.cols() ) ;
-    int index_max = 0 ;
-    real big = 0. ;
-    real theSum = 0. ;
-    real dum = 0. ;
+  inline static void LU_factor(gene_matrix &LU, Pivot_Vector &pivot, int N) {
+    ASSERT(LU.rows() == LU.cols());
+    int index_max = 0;
+    real big = 0.;
+    real theSum = 0.;
+    real dum = 0.;
    // Get the implicit scaling information :
-    gene_vector ImplicitScaling( N ) ;
-    for( int i=0; i<N; i++ ) {
-      big = 0. ;
-      for( int j=0; j<N; j++ ) {
-	if( abs( LU( i, j ) )>=big ) big = abs( LU( i, j ) ) ;
+    gene_vector ImplicitScaling(N);
+    for (int i = 0; i < N; i++) {
+      big = 0.;
+      for (int j = 0; j < N; j++) {
+        if (abs(LU(i, j)) >= big) big = abs(LU(i, j));
      }
-      if( big==0. ) {
-	INFOS( "blitz_LU_factor::Singular matrix" ) ;
-	exit( 0 ) ;
+      if (big == 0.) {
+        INFOS("blitz_LU_factor::Singular matrix");
+        exit(0);
      }
-      ImplicitScaling( i ) = 1./big ;
+      ImplicitScaling(i) = 1. / big;
    }
    // Loop over columns of Crout's method :
-    for( int j=0; j<N; j++ ) {
-      for( int i=0; i<j; i++ ) {
-	theSum = LU( i, j ) ;
-	theSum -= matrix_matrix_product_sliced(LU, i, 0, i-1, LU, 0, j) ;
-	//	theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
-	LU( i, j ) = theSum ;
+    for (int j = 0; j < N; j++) {
+      for (int i = 0; i < j; i++) {
+        theSum = LU(i, j);
+        theSum -= matrix_matrix_product_sliced(LU, i, 0, i - 1, LU, 0, j);
+        //	theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
+        LU(i, j) = theSum;
      }
-      
+
      // Search for the largest pivot element :
-      big = 0. ;
-      for( int i=j; i<N; i++ ) {
-	theSum = LU( i, j ) ;
-	theSum -= matrix_matrix_product_sliced(LU, i, 0, j-1, LU, 0, j) ;
-	//	theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
-	LU( i, j ) = theSum ;
-	if( (ImplicitScaling( i )*abs( theSum ))>=big ) {
-	  dum = ImplicitScaling( i )*abs( theSum ) ;
-	  big = dum ;
-	  index_max = i ;
-	}
+      big = 0.;
+      for (int i = j; i < N; i++) {
+        theSum = LU(i, j);
+        theSum -= matrix_matrix_product_sliced(LU, i, 0, j - 1, LU, 0, j);
+        //	theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
+        LU(i, j) = theSum;
+        if ((ImplicitScaling(i) * abs(theSum)) >= big) {
+          dum = ImplicitScaling(i) * abs(theSum);
+          big = dum;
+          index_max = i;
+        }
      }
      // Interchanging rows and the scale factor :
-      if( j!=index_max ) {
-	for( int k=0; k<N; k++ ) {
-	  dum = LU( index_max, k ) ;
-	  LU( index_max, k ) = LU( j, k ) ;
-	  LU( j, k ) = dum ;
-	}
-	ImplicitScaling( index_max ) = ImplicitScaling( j ) ;
+      if (j != index_max) {
+        for (int k = 0; k < N; k++) {
+          dum = LU(index_max, k);
+          LU(index_max, k) = LU(j, k);
+          LU(j, k) = dum;
+        }
+        ImplicitScaling(index_max) = ImplicitScaling(j);
      }
-      pivot( j ) = index_max ;
-      if ( LU( j, j )==0. ) LU( j, j ) = 1.e-20 ;
+      pivot(j) = index_max;
+      if (LU(j, j) == 0.) LU(j, j) = 1.e-20;
      // Divide by the pivot element :
-      if( j<N ) {
-	dum = 1./LU( j, j ) ;
-	for( int i=j+1; i<N; i++ ) LU( i, j ) *= dum ;
+      if (j < N) {
+        dum = 1. / LU(j, j);
+        for (int i = j + 1; i < N; i++) LU(i, j) *= dum;
      }
    }
-
  }

-  inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
-  {
-
+  inline static void LU_solve(const gene_matrix &LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N) {
    // Pour conserver le meme header, on travaille sur X, copie du second-membre B
-    X = B.copy() ;
-    ASSERT( LU.rows()==LU.cols() ) ;
-    firstIndex indI ;
+    X = B.copy();
+    ASSERT(LU.rows() == LU.cols());
+    firstIndex indI;
    // Forward substitution :
-    int ii = 0 ;
-    real theSum = 0. ;
-    for( int i=0; i<N; i++ ) {
-      int ip = pivot( i ) ;
-      theSum = X( ip ) ;
+    int ii = 0;
+    real theSum = 0.;
+    for (int i = 0; i < N; i++) {
+      int ip = pivot(i);
+      theSum = X(ip);
      //      theSum = B( ip ) ;
-      X( ip ) = X( i ) ;
+      X(ip) = X(i);
      //      B( ip ) = B( i ) ;
-      if( ii ) {
-	theSum -= matrix_vector_product_sliced(LU, X, i, ii-1, i-1) ;
-	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
-	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
-      } else if( theSum ) {
-	ii = i+1 ;
+      if (ii) {
+        theSum -= matrix_vector_product_sliced(LU, X, i, ii - 1, i - 1);
+        //	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
+        //	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
+      } else if (theSum) {
+        ii = i + 1;
      }
-      X( i ) = theSum ;
+      X(i) = theSum;
      //      B( i ) = theSum ;
    }
    // Backsubstitution :
-    for( int i=N-1; i>=0; i-- ) {
-      theSum = X( i ) ;
+    for (int i = N - 1; i >= 0; i--) {
+      theSum = X(i);
      //      theSum = B( i ) ;
-      theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ;
+      theSum -= matrix_vector_product_sliced(LU, X, i, i + 1, N);
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ;
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ;
      // Store a component of the solution vector :
-      X( i ) = theSum/LU( i, i ) ;
+      X(i) = theSum / LU(i, i);
      //      B( i ) = theSum/LU( i, i ) ;
    }
-
  }
-
 };

 #endif
--- a/bench/btl/libs/mtl4/mtl4_interface.hh
+++ b/bench/btl/libs/mtl4/mtl4_interface.hh
@ -25,120 +25,100 @@

 using namespace mtl;

-template<class real>
+template <class real>
 class mtl4_interface {
+ public:
+  typedef real real_type;

-public :
-
-  typedef real real_type ;
-
-  typedef std::vector<real>  stl_vector;
-  typedef std::vector<stl_vector > stl_matrix;
+  typedef std::vector<real> stl_vector;
+  typedef std::vector<stl_vector> stl_matrix;

  typedef mtl::dense2D<real, mtl::matrix::parameters<mtl::tag::col_major> > gene_matrix;
-  typedef mtl::dense_vector<real>  gene_vector;
+  typedef mtl::dense_vector<real> gene_vector;

  static inline std::string name() { return "mtl4"; }

-  static void free_matrix(gene_matrix & A, int N){
-    return ;
-  }
+  static void free_matrix(gene_matrix& A, int N) { return; }

-  static void free_vector(gene_vector & B){
-    return ;
-  }
+  static void free_vector(gene_vector& B) { return; }

-  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
+  static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
    A.change_dim(A_stl[0].size(), A_stl.size());

-    for (int j=0; j<A_stl.size() ; j++){
-      for (int i=0; i<A_stl[j].size() ; i++){
-        A(i,j) = A_stl[j][i];
+    for (int j = 0; j < A_stl.size(); j++) {
+      for (int i = 0; i < A_stl[j].size(); i++) {
+        A(i, j) = A_stl[j][i];
      }
    }
  }

-  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
+  static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
    B.change_dim(B_stl.size());
-    for (int i=0; i<B_stl.size() ; i++){
+    for (int i = 0; i < B_stl.size(); i++) {
      B[i] = B_stl[i];
    }
  }

-  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
-    for (int i=0; i<B_stl.size() ; i++){
+  static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
+    for (int i = 0; i < B_stl.size(); i++) {
      B_stl[i] = B[i];
    }
  }

-  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
-    int N=A_stl.size();
-    for (int j=0;j<N;j++){
+  static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
+    int N = A_stl.size();
+    for (int j = 0; j < N; j++) {
      A_stl[j].resize(N);
-      for (int i=0;i<N;i++){
-        A_stl[j][i] = A(i,j);
+      for (int i = 0; i < N; i++) {
+        A_stl[j][i] = A(i, j);
      }
    }
  }

-  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (A*B);
-//     morton_dense<double, doppled_64_row_mask> C(N,N);
-//     C = B;
-//     X = (A*C);
+  static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
+    X = (A * B);
+    //     morton_dense<double, doppled_64_row_mask> C(N,N);
+    //     C = B;
+    //     X = (A*C);
  }

-  static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (trans(A)*trans(B));
+  static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
+                                                      int N) {
+    X = (trans(A) * trans(B));
  }

-//   static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
-//     X = (trans(A)*A);
-//   }
+  //   static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
+  //     X = (trans(A)*A);
+  //   }

-  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
-    X = (A*trans(A));
-  }
+  static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A * trans(A)); }

-  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = (A*B);
-  }
+  static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) { X = (A * B); }

-  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = (trans(A)*B);
-  }
+  static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) { X = (trans(A) * B); }

-  static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
-    Y += coef * X;
-  }
+  static inline void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }

-  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
-    Y = a*X + b*Y;
-  }
+  static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) { Y = a * X + b * Y; }

-//   static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
-//     C = X;
-//     recursive_cholesky(C);
-//   }
+  //   static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
+  //     C = X;
+  //     recursive_cholesky(C);
+  //   }

-//   static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
-//     R = X;
-//     std::vector<int> ipvt(N);
-//     lu_factor(R, ipvt);
-//   }
+  //   static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
+  //     R = X;
+  //     std::vector<int> ipvt(N);
+  //     lu_factor(R, ipvt);
+  //   }

-  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+  static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
    X = lower_trisolve(L, B);
  }

-  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
-    cible = source;
-  }
-
-  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
-    cible = source;
-  }
+  static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { cible = source; }

+  static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { cible = source; }
 };

 #endif
--- a/bench/btl/libs/tensors/main_linear.cpp
+++ b/bench/btl/libs/tensors/main_linear.cpp
@ -14,10 +14,9 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_axpy<tensor_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-  bench<Action_axpby<tensor_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+int main() {
+  bench<Action_axpy<tensor_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
+  bench<Action_axpby<tensor_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);

  return 0;
 }
--- a/bench/btl/libs/tensors/main_matmat.cpp
+++ b/bench/btl/libs/tensors/main_matmat.cpp
@ -13,9 +13,8 @@

 BTL_MAIN;

-int main()
-{
-  bench<Action_matrix_matrix_product<tensor_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+int main() {
+  bench<Action_matrix_matrix_product<tensor_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);

  return 0;
 }
--- a/Show More
+++ b/Show More