add Cholesky and eigensolver benchmark

2025-09-22 06:13:13 +08:00 · 2008-07-08 17:20:17 +00:00 · 2008-07-08 17:20:17 +00:00 · 77a622f2bb
commit 77a622f2bb
parent 6f09d3a67d
8 changed files with 384 additions and 104 deletions
--- a/bench/BenchSparseUtil.h
+++ b/bench/BenchSparseUtil.h
@ -3,8 +3,6 @@
 #include <Eigen/Sparse>
 #include <bench/BenchTimer.h>
 using namespace std;
 using namespace Eigen;
 USING_PART_OF_NAMESPACE_EIGEN
--- a/bench/BenchUtil.h
+++ b/bench/BenchUtil.h
@ -26,3 +26,43 @@ template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
 {
  mat.setIdentity();
 }
 #ifndef __INTEL_COMPILER
 #define DISABLE_SSE_EXCEPTIONS()  { \
  int aux; \
  asm( \
  "stmxcsr   %[aux]           \n\t" \
  "orl       $32832, %[aux]   \n\t" \
  "ldmxcsr   %[aux]           \n\t" \
  : : [aux] "m" (aux)); \
 }
 #else
 #define DISABLE_SSE_EXCEPTIONS()  
 #endif
 #ifdef BENCH_GMM
 #include <gmm/gmm.h>
 template <typename EigenMatrixType, typename GmmMatrixType>
 void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
 {
  dst.resize(src.rows(),src.cols());
  for (int j=0; j<src.cols(); ++j)
    for (int i=0; i<src.rows(); ++i)
      dst(i,j) = src.coeff(i,j);
 }
 #endif
 #ifdef BENCH_GSL
 #include <gsl/gsl_matrix.h>
 #include <gsl/gsl_linalg.h>
 #include <gsl/gsl_eigen.h>
 template <typename EigenMatrixType>
 void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
 {
  for (int j=0; j<src.cols(); ++j)
    for (int i=0; i<src.rows(); ++i)
      gsl_matrix_set(*dst, i, j, src.coeff(i,j));
 }
 #endif
--- a/bench/basicbenchmark.cpp
+++ b/bench/basicbenchmark.cpp
@ -4,19 +4,7 @@
 int main(int argc, char *argv[])
 {
-  // disable floating point exceptions
+  DISABLE_SSE_EXCEPTIONS();
  // this leads to more stable bench results
  // (this is done by default by ICC)
  #ifndef __INTEL_COMPILER
  {
    int aux;
    asm(
    "stmxcsr   %[aux]           \n\t"
    "orl       $32832, %[aux]   \n\t"
    "ldmxcsr   %[aux]           \n\t"
    : : [aux] "m" (aux));
  }
  #endif
  // this is the list of matrix type and size we want to bench:
  // ((suffix) (matrix size) (number of iterations))
--- a/bench/benchCholesky.cpp
+++ b/bench/benchCholesky.cpp
@ -0,0 +1,132 @@
 // g++ -DNDEBUG -O3 -I.. benchCholesky.cpp  -o benchCholesky && ./benchCholesky
 // options:
 //  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
 //  -DEIGEN_DONT_VECTORIZE
 //  -msse2
 //  -DREPEAT=100
 //  -DTRIES=10
 //  -DSCALAR=double
 #include <Eigen/Array>
 #include <Eigen/Cholesky>
 #include <bench/BenchUtil.h>
 using namespace Eigen;
 #ifndef REPEAT
 #define REPEAT 10000
 #endif
 #ifndef TRIES
 #define TRIES 4
 #endif
 typedef float Scalar;
 template <typename MatrixType>
 __attribute__ ((noinline)) void benchCholesky(const MatrixType& m)
 {
  int rows = m.rows();
  int cols = m.cols();
  int repeats = (REPEAT*1000)/(rows*rows);
  typedef typename MatrixType::Scalar Scalar;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
  MatrixType a = MatrixType::random(rows,cols);
  SquareMatrixType covMat =  a * a.adjoint();
  BenchTimer timerNoSqrt, timerSqrt;
  Scalar acc = 0;
  int r = ei_random<int>(0,covMat.rows()-1);
  int c = ei_random<int>(0,covMat.cols()-1);
  for (int t=0; t<TRIES; ++t)
  {
    timerNoSqrt.start();
    for (int k=0; k<repeats; ++k)
    {
      CholeskyWithoutSquareRoot<SquareMatrixType> cholnosqrt(covMat);
      acc += cholnosqrt.matrixL().coeff(r,c);
    }
    timerNoSqrt.stop();
  }
  for (int t=0; t<TRIES; ++t)
  {
    timerSqrt.start();
    for (int k=0; k<repeats; ++k)
    {
      Cholesky<SquareMatrixType> chol(covMat);
      acc += chol.matrixL().coeff(r,c);
    }
    timerSqrt.stop();
  }
  if (MatrixType::RowsAtCompileTime==Dynamic)
    std::cout << "dyn   ";
  else
    std::cout << "fixed ";
  std::cout << covMat.rows() << " \t"
            << (timerNoSqrt.value() * REPEAT) / repeats << "s \t"
            << (timerSqrt.value() * REPEAT) / repeats << "s";
  #ifdef BENCH_GSL
  if (MatrixType::RowsAtCompileTime==Dynamic)
  {
    timerSqrt.reset();
    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
    eiToGsl(covMat, &gslCovMat);
    for (int t=0; t<TRIES; ++t)
    {
      timerSqrt.start();
      for (int k=0; k<repeats; ++k)
      {
        gsl_matrix_memcpy(gslCopy,gslCovMat);
        gsl_linalg_cholesky_decomp(gslCopy);
        acc += gsl_matrix_get(gslCopy,r,c);
      }
      timerSqrt.stop();
    }
    std::cout << " | \t"
              << timerSqrt.value() * REPEAT / repeats << "s";
    gsl_matrix_free(gslCovMat);
  }
  #endif
  std::cout << "\n";
  // make sure the compiler does not optimize too much
  if (acc==123)
    std::cout << acc;
 }
 int main(int argc, char* argv[])
 {
  const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
  std::cout << "size            no sqrt         standard";
  #ifdef BENCH_GSL
  std::cout << "       GSL (standard + double + ATLAS)  ";
  #endif
  std::cout << "\n";
  for (uint i=0; dynsizes[i]>0; ++i)
    benchCholesky(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
  benchCholesky(Matrix<Scalar,2,2>());
  benchCholesky(Matrix<Scalar,3,3>());
  benchCholesky(Matrix<Scalar,4,4>());
  benchCholesky(Matrix<Scalar,5,5>());
  benchCholesky(Matrix<Scalar,6,6>());
  benchCholesky(Matrix<Scalar,7,7>());
  benchCholesky(Matrix<Scalar,8,8>());
  benchCholesky(Matrix<Scalar,12,12>());
  benchCholesky(Matrix<Scalar,16,16>());
  return 0;
 }
--- a/bench/benchEigenSolver.cpp
+++ b/bench/benchEigenSolver.cpp
@ -0,0 +1,210 @@
 // g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp  -o benchEigenSolver && ./benchEigenSolver
 // options:
 //  -DBENCH_GMM
 //  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
 //  -DEIGEN_DONT_VECTORIZE
 //  -msse2
 //  -DREPEAT=100
 //  -DTRIES=10
 //  -DSCALAR=double
 #include <Eigen/Array>
 #include <Eigen/QR>
 #include <bench/BenchUtil.h>
 using namespace Eigen;
 #ifndef REPEAT
 #define REPEAT 1000
 #endif
 #ifndef TRIES
 #define TRIES 4
 #endif
 #ifndef SCALAR
 #define SCALAR float
 #endif
 typedef SCALAR Scalar;
 template <typename MatrixType>
 __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
 {
  int rows = m.rows();
  int cols = m.cols();
  int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
  int saRepeats = stdRepeats * 4;
  typedef typename MatrixType::Scalar Scalar;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
  MatrixType a = MatrixType::random(rows,cols);
  SquareMatrixType covMat =  a * a.adjoint();
  BenchTimer timerSa, timerStd;
  Scalar acc = 0;
  int r = ei_random<int>(0,covMat.rows()-1);
  int c = ei_random<int>(0,covMat.cols()-1);
  {
    SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
    for (int t=0; t<TRIES; ++t)
    {
      timerSa.start();
      for (int k=0; k<saRepeats; ++k)
      {
        ei.compute(covMat);
        acc += ei.eigenvectors().coeff(r,c);
      }
      timerSa.stop();
    }
  }
  {
    EigenSolver<SquareMatrixType> ei(covMat);
    for (int t=0; t<TRIES; ++t)
    {
      timerStd.start();
      for (int k=0; k<stdRepeats; ++k)
      {
        ei.compute(covMat);
        acc += ei.eigenvectors().coeff(r,c);
      }
      timerStd.stop();
    }
  }
  if (MatrixType::RowsAtCompileTime==Dynamic)
    std::cout << "dyn   ";
  else
    std::cout << "fixed ";
  std::cout << covMat.rows() << " \t"
            << timerSa.value() * REPEAT / saRepeats << "s \t"
            << timerStd.value() * REPEAT / stdRepeats << "s";
  #ifdef BENCH_GMM
  if (MatrixType::RowsAtCompileTime==Dynamic)
  {
    timerSa.reset();
    timerStd.reset();
    gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
    gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
    std::vector<Scalar> eigval(covMat.rows());
    eiToGmm(covMat, gmmCovMat);
    for (int t=0; t<TRIES; ++t)
    {
      timerSa.start();
      for (int k=0; k<saRepeats; ++k)
      {
        gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
        acc += eigvect(r,c);
      }
      timerSa.stop();
    }
    // the non-selfadjoint solver does not compute the eigen vectors
 //     for (int t=0; t<TRIES; ++t)
 //     {
 //       timerStd.start();
 //       for (int k=0; k<stdRepeats; ++k)
 //       {
 //         gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
 //         acc += eigvect(r,c);
 //       }
 //       timerStd.stop();
 //     }
    std::cout << " | \t"
              << timerSa.value() * REPEAT / saRepeats << "s"
              << /*timerStd.value() * REPEAT / stdRepeats << "s"*/ "   na   ";
  }
  #endif
  #ifdef BENCH_GSL
  if (MatrixType::RowsAtCompileTime==Dynamic)
  {
    timerSa.reset();
    timerStd.reset();
    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
    gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
    gsl_vector* eigval  = gsl_vector_alloc(covMat.rows());
    gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
    gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
    gsl_vector_complex* eigvalz  = gsl_vector_complex_alloc(covMat.rows());
    gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
    eiToGsl(covMat, &gslCovMat);
    for (int t=0; t<TRIES; ++t)
    {
      timerSa.start();
      for (int k=0; k<saRepeats; ++k)
      {
        gsl_matrix_memcpy(gslCopy,gslCovMat);
        gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
        acc += gsl_matrix_get(eigvect,r,c);
      }
      timerSa.stop();
    }
    for (int t=0; t<TRIES; ++t)
    {
      timerStd.start();
      for (int k=0; k<stdRepeats; ++k)
      {
        gsl_matrix_memcpy(gslCopy,gslCovMat);
        gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
        acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
      }
      timerStd.stop();
    }
    std::cout << " | \t"
              << timerSa.value() * REPEAT / saRepeats << "s \t"
              << timerStd.value() * REPEAT / stdRepeats << "s";
    gsl_matrix_free(gslCovMat);
    gsl_vector_free(gslCopy);
    gsl_matrix_free(eigvect);
    gsl_vector_free(eigval);
    gsl_matrix_complex_free(eigvectz);
    gsl_vector_complex_free(eigvalz);
    gsl_eigen_symmv_free(eisymm);
    gsl_eigen_nonsymmv_free(einonsymm);
  }
  #endif
  std::cout << "\n";
  // make sure the compiler does not optimize too much
  if (acc==123)
    std::cout << acc;
 }
 int main(int argc, char* argv[])
 {
  const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
  std::cout << "size            selfadjoint       generic";
  #ifdef BENCH_GMM
  std::cout << "        GMM++          ";
  #endif
  #ifdef BENCH_GSL
  std::cout << "       GSL (double + ATLAS)  ";
  #endif
  std::cout << "\n";
  for (uint i=0; dynsizes[i]>0; ++i)
    benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
  benchEigenSolver(Matrix<Scalar,2,2>());
  benchEigenSolver(Matrix<Scalar,3,3>());
  benchEigenSolver(Matrix<Scalar,4,4>());
  benchEigenSolver(Matrix<Scalar,6,6>());
  benchEigenSolver(Matrix<Scalar,8,8>());
  benchEigenSolver(Matrix<Scalar,12,12>());
  benchEigenSolver(Matrix<Scalar,16,16>());
  return 0;
 }
--- a/bench/bench_unrolling
+++ b/bench/bench_unrolling
@ -5,7 +5,7 @@
 for ((i=1; i<16; ++i)); do
    echo "Matrix size: $i x $i :"
-    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=25 -o benchmark && time ./benchmark >/dev/null
+    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
    $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
    echo " "
 done
--- a/bench/ompbench.cxxlist
+++ b/bench/ompbench.cxxlist
@ -1,7 +0,0 @@
 #!/bin/bash
 CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
 # CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
 CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp"
--- a/bench/ompbenchmark.cpp
+++ b/bench/ompbenchmark.cpp
@ -1,81 +0,0 @@
 // g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
 // icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp  benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
 #include <omp.h>
 #include "BenchUtil.h"
 #include "basicbenchmark.h"
 // #include <Eigen/Core>
 // #include "BenchTimer.h"
 //
 // using namespace std;
 // USING_PART_OF_NAMESPACE_EIGEN
 //
 // enum {LazyEval, EarlyEval, OmpEval};
 //
 // template<int Mode, typename MatrixType>
 // double benchSingleProc(const MatrixType& mat, int iterations, int tries)  __attribute__((noinline));
 //
 // template<int Mode, typename MatrixType>
 // double benchBasic(const MatrixType& mat, int iterations, int tries)
 // {
 //   const int rows = mat.rows();
 //   const int cols = mat.cols();
 //
 //   Eigen::BenchTimer timer;
 //   for(uint t=0; t<tries; ++t)
 //   {
 //     MatrixType I = MatrixType::identity(rows, cols);
 //     MatrixType m = MatrixType::random(rows, cols);
 //
 //     timer.start();
 //     for(int a = 0; a < iterations; a++)
 //     {
 //       if(Mode==LazyEval)
 //         m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
 //       else if(Mode==OmpEval)
 //         m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
 //       else
 //         m = I + 0.00005 * (m + m * m);
 //     }
 //     timer.stop();
 //     cerr << m;
 //   }
 //   return timer.value();
 // };
 int main(int argc, char *argv[])
 {
  // disbale floating point exceptions
  // this leads to more stable bench results
  {
    int aux;
    asm(
    "stmxcsr   %[aux]           \n\t"
    "orl       $32832, %[aux]   \n\t"
    "ldmxcsr   %[aux]           \n\t"
    : : [aux] "m" (aux));
  }
  // commented since the default setting is use as many threads as processors
  //omp_set_num_threads(omp_get_num_procs());
  std::cout << "double, fixed-size 4x4: "
    << benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s  "
    << benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s  \n";
  #define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\
      double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
      double omp    = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
      std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \
        << " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \
    }
  BENCH_MATRIX(double,   32, 1000, 10);
  BENCH_MATRIX(double,  128,   10, 10);
  BENCH_MATRIX(double,  512,    1,  6);
  BENCH_MATRIX(double, 1024,    1,  4);
  return 0;
 }