diff --git a/Eigen/Core b/Eigen/Core index e52ee96e9..f2b3639ca 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -40,6 +40,7 @@ namespace Eigen { #include "src/Core/IO.h" #include "src/Core/Swap.h" #include "src/Core/CommaInitializer.h" +#include "src/Core/EvalOMP.h" } // namespace Eigen diff --git a/Eigen/src/Core/EvalOMP.h b/Eigen/src/Core/EvalOMP.h new file mode 100644 index 000000000..01f78b0c8 --- /dev/null +++ b/Eigen/src/Core/EvalOMP.h @@ -0,0 +1,118 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_EVAL_OMP_H +#define EIGEN_EVAL_OMP_H + +/** \class EvalOMP + * + * \brief Parallel evaluation of an expression using OpenMP + * + * The template parameter Expression is the type of the expression that we are evaluating. + * + * This class is the return type of MatrixBase::evalOMP() and most of the time this is the + * only way it is used. + * + * Note that if OpenMP is not enabled, then this class is equivalent to Eval. + * + * \sa MatrixBase::evalOMP(), class Eval, MatrixBase::eval() + */ +template class EvalOMP : NoOperatorEquals, + public Matrix< typename ExpressionType::Scalar, + ExpressionType::Traits::RowsAtCompileTime, + ExpressionType::Traits::ColsAtCompileTime, + EIGEN_DEFAULT_MATRIX_STORAGE_ORDER, + ExpressionType::Traits::MaxRowsAtCompileTime, + ExpressionType::Traits::MaxColsAtCompileTime> +{ + public: + typedef typename ExpressionType::Scalar Scalar; + + /** The actual matrix type to evaluate to. This type can be used independently + * of the rest of this class to get the actual matrix type to evaluate and store + * the value of an expression. + */ + typedef Matrix MatrixType; + + #ifdef _OPENMP + explicit EvalOMP(const ExpressionType& other) + : MatrixType(other.rows(), other.cols()) + { + #ifdef __INTEL_COMPILER + #pragma omp parallel default(none) shared(other) + #else + #pragma omp parallel default(none) + #endif + { + if (this->cols()>this->rows()) + { + #pragma omp for + for(int j = 0; j < this->cols(); j++) + for(int i = 0; i < this->rows(); i++) + this->coeffRef(i, j) = other.coeff(i, j); + } + else + { + #pragma omp for + for(int i = 0; i < this->rows(); i++) + for(int j = 0; j < this->cols(); j++) + this->coeffRef(i, j) = other.coeff(i, j); + } + } + } + #else + explicit EvalOMP(const ExpressionType& other) : MatrixType(other) {} + #endif +}; + +/** Evaluates *this in a parallel fashion using OpenMP and returns the obtained matrix. + * + * Of course, it only makes sense to call this function for complex expressions, and/or + * large matrices (>32x32), \b and if there is no outer loop which can be parallelized. + * + * It is the responsibility of the user manage the OpenMP parameters, for instance: + * \code + * #include + * // ... + * omp_set_num_threads(omp_get_num_procs()); + * \endcode + * You also need to enable OpenMP on your compiler (e.g., -fopenmp) during both compilation and linking. + * + * Note that if OpenMP is not enabled, then evalOMP() is equivalent to eval(). + * + * \sa class EvalOMP, eval() + */ +template +const EvalOMP MatrixBase::evalOMP() const +{ + return EvalOMP(*static_cast(this)); +} + +#endif // EIGEN_EVAL_OMP_H diff --git a/Eigen/src/Core/ForwardDeclarations.h b/Eigen/src/Core/ForwardDeclarations.h index aa9c4a052..dce88336e 100644 --- a/Eigen/src/Core/ForwardDeclarations.h +++ b/Eigen/src/Core/ForwardDeclarations.h @@ -44,6 +44,7 @@ template class DiagonalCoeffs; template class Identity; template class Map; template class Eval; +template class EvalOMP; struct ScalarProductOp; struct ScalarQuotientOp; diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index b0b5a50a3..ccec292f6 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -359,6 +359,7 @@ template class MatrixBase /// \name special functions //@{ const Eval eval() const EIGEN_ALWAYS_INLINE; + const EvalOMP evalOMP() const EIGEN_ALWAYS_INLINE; template const CwiseUnaryOp cwise(const CustomUnaryOp& func = CustomUnaryOp()) const; diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h new file mode 100644 index 000000000..e86c6ce13 --- /dev/null +++ b/bench/BenchTimer.h @@ -0,0 +1,75 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_BENCH_TIMER_H +#define EIGEN_BENCH_TIMER_H + +#include +#include +#include + +namespace Eigen +{ + +/** Elapsed time timer keeping the best try. + */ +class BenchTimer +{ +public: + + BenchTimer() : m_best(1e99) {} + + ~BenchTimer() {} + + inline void start(void) {m_start = getTime();} + inline void stop(void) + { + m_best = std::min(m_best, getTime() - m_start); + } + + /** Return the best elapsed time. + */ + inline double value(void) + { + return m_best; + } + + static inline double getTime(void) + { + struct timeval tv; + struct timezone tz; + gettimeofday(&tv, &tz); + return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec; + } + +protected: + + double m_best, m_start; + +}; + +} + +#endif // EIGEN_BENCH_TIMER_H diff --git a/bench/BenchUtil.h b/bench/BenchUtil.h new file mode 100644 index 000000000..bb3c4611c --- /dev/null +++ b/bench/BenchUtil.h @@ -0,0 +1,28 @@ + +#include +#include "BenchTimer.h" + +using namespace std; +USING_PART_OF_NAMESPACE_EIGEN + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template void initMatrix_random(MatrixType& mat) __attribute__((noinline)); +template void initMatrix_random(MatrixType& mat) +{ + mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols()); +} + +template void initMatrix_identity(MatrixType& mat) __attribute__((noinline)); +template void initMatrix_identity(MatrixType& mat) +{ + mat.setIdentity(); +} diff --git a/bench/README.txt b/bench/README.txt new file mode 100644 index 000000000..39831ae8a --- /dev/null +++ b/bench/README.txt @@ -0,0 +1,55 @@ + +This folder contains a couple of benchmark utities and Eigen benchmarks. + +**************************** +* bench_multi_compilers.sh * +**************************** + +This script allows to run a benchmark on a set of different compilers/compiler options. +It takes two arguments: + - a file defining the list of the compilers with their options + - the .cpp file of the benchmark + +Examples: + +$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp + + g++-4.1 -O3 -DNDEBUG -finline-limit=10000 + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.271102 0.131416 0.422322 0.198633 + 0.201658 0.102436 0.397566 0.207282 + + g++-4.2 -O3 -DNDEBUG -finline-limit=10000 + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.107805 0.0890579 0.30265 0.161843 + 0.127157 0.0712581 0.278341 0.191029 + + g++-4.3 -O3 -DNDEBUG -finline-limit=10000 + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.134318 0.105291 0.3704 0.180966 + 0.137703 0.0732472 0.31225 0.202204 + + icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size + 3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 / + 0.226145 0.0941319 0.371873 0.159433 + 0.109302 0.0837538 0.328102 0.173891 + + +$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp + + g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp + double, fixed-size 4x4: 0.00165105s 0.0778739s + double, 32x32: 0.0654769s 0.075289s => x0.869674 (2) + double, 128x128: 0.054148s 0.0419669s => x1.29025 (2) + double, 512x512: 0.913799s 0.428533s => x2.13239 (2) + double, 1024x1024: 14.5972s 9.3542s => x1.5605 (2) + + icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp + double, fixed-size 4x4: 0.000589848s 0.019949s + double, 32x32: 0.0682781s 0.0449722s => x1.51823 (2) + double, 128x128: 0.0547509s 0.0435519s => x1.25714 (2) + double, 512x512: 0.829436s 0.424438s => x1.9542 (2) + double, 1024x1024: 14.5243s 10.7735s => x1.34815 (2) + + + diff --git a/bench/basicbench.cxxlist b/bench/basicbench.cxxlist new file mode 100644 index 000000000..93266aaf2 --- /dev/null +++ b/bench/basicbench.cxxlist @@ -0,0 +1,12 @@ +#!/bin/bash + +CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG" +CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000" + +CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG" +CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000" + +CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG" +CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000" + +CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size" \ No newline at end of file diff --git a/bench/basicbenchmark.cpp b/bench/basicbenchmark.cpp new file mode 100644 index 000000000..c44ed4514 --- /dev/null +++ b/bench/basicbenchmark.cpp @@ -0,0 +1,46 @@ + +#include "BenchUtil.h" +#include "basicbenchmark.h" + +int main(int argc, char *argv[]) +{ + // disbale floating point exceptions + // this leads to more stable bench results + // (this is done by default by ICC) + #ifndef __INTEL_COMPILER + { + int aux; + asm( + "stmxcsr %[aux] \n\t" + "orl $32832, %[aux] \n\t" + "ldmxcsr %[aux] \n\t" + : : [aux] "m" (aux)); + } + #endif + + // this is the list of matrix type and size we want to bench: + // ((suffix) (matrix size) (number of iterations)) + #define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000)) +// #define MODES ((Xd)(20)(10000)) + + #define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / " + + std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl; + + const int tries = 10; + + #define _RUN_BENCH(R,ARG,EL) \ + std::cout << ARG( \ + BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\ + BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \ + << " "; + + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES ); + std::cout << endl; + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES ); + std::cout << endl; + + return 0; +} diff --git a/bench/basicbenchmark.h b/bench/basicbenchmark.h new file mode 100644 index 000000000..60e1c0258 --- /dev/null +++ b/bench/basicbenchmark.h @@ -0,0 +1,59 @@ + +enum {LazyEval, EarlyEval, OmpEval}; + +template +void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline)); + +template +void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) +{ + for(int a = 0; a < iterations; a++) + { + if (Mode==LazyEval) + { + asm("#begin_bench_loop LazyEval"); + if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = (I + 0.00005 * (m + m.lazyProduct(m))).eval(); + } + else if (Mode==OmpEval) + { + asm("#begin_bench_loop OmpEval"); + if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP(); + } + else + { + asm("#begin_bench_loop EarlyEval"); + if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = I + 0.00005 * (m + m * m); + } + asm("#end_bench_loop"); + } +} + +template +double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline)); + +template +double benchBasic(const MatrixType& mat, int iterations, int tries) +{ + const int rows = mat.rows(); + const int cols = mat.cols(); + + MatrixType I(rows,cols); + MatrixType m(rows,cols); + + initMatrix_identity(I); + + Eigen::BenchTimer timer; + for(uint t=0; t(I, m, iterations); + timer.stop(); + cerr << m; + } + return timer.value(); +}; + diff --git a/bench/bench_multi_compilers.sh b/bench/bench_multi_compilers.sh new file mode 100755 index 000000000..ce5586fb9 --- /dev/null +++ b/bench/bench_multi_compilers.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +if (($# < 2)); then + echo "Usage: $0 compilerlist.txt benchfile.cpp" +else + +compilerlist=$1 +benchfile=$2 + +g=0 +source $compilerlist + +# for each compiler, compile benchfile and run the benchmark +for (( i=0 ; i /dev/null + echo "" + else + echo "compiler not found: $compiler" + fi +done + +fi diff --git a/doc/bench_unrolling b/bench/bench_unrolling similarity index 100% rename from doc/bench_unrolling rename to bench/bench_unrolling diff --git a/doc/benchmark.cpp b/bench/benchmark.cpp similarity index 100% rename from doc/benchmark.cpp rename to bench/benchmark.cpp diff --git a/doc/benchmarkX.cpp b/bench/benchmarkX.cpp similarity index 92% rename from doc/benchmarkX.cpp rename to bench/benchmarkX.cpp index 1c8b6b803..09173e1ed 100644 --- a/doc/benchmarkX.cpp +++ b/bench/benchmarkX.cpp @@ -13,7 +13,7 @@ int main(int argc, char *argv[]) { m(i,j) = 0.1 * (i+20*j); } - for(int a = 0; a < 1000000; a++) + for(int a = 0; a < 100000; a++) { m = I + 0.00005 * (m + m*m); } diff --git a/doc/benchmark_suite b/bench/benchmark_suite similarity index 100% rename from doc/benchmark_suite rename to bench/benchmark_suite diff --git a/bench/ompbench.cxxlist b/bench/ompbench.cxxlist new file mode 100644 index 000000000..fc6681d33 --- /dev/null +++ b/bench/ompbench.cxxlist @@ -0,0 +1,7 @@ +#!/bin/bash + +CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" + +# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp" + +CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp" \ No newline at end of file diff --git a/bench/ompbenchmark.cpp b/bench/ompbenchmark.cpp new file mode 100644 index 000000000..ac5155cb8 --- /dev/null +++ b/bench/ompbenchmark.cpp @@ -0,0 +1,81 @@ +// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null +// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null + +#include +#include "BenchUtil.h" +#include "basicbenchmark.h" + +// #include +// #include "BenchTimer.h" +// +// using namespace std; +// USING_PART_OF_NAMESPACE_EIGEN +// +// enum {LazyEval, EarlyEval, OmpEval}; +// +// template +// double benchSingleProc(const MatrixType& mat, int iterations, int tries) __attribute__((noinline)); +// +// template +// double benchBasic(const MatrixType& mat, int iterations, int tries) +// { +// const int rows = mat.rows(); +// const int cols = mat.cols(); +// +// Eigen::BenchTimer timer; +// for(uint t=0; t(Matrix4d(), 10000, 10) << "s " + << benchBasic(Matrix4d(), 10000, 10) << "s \n"; + + #define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\ + double single = benchBasic(Matrix(SIZE,SIZE), ITERATIONS, TRIES); \ + double omp = benchBasic (Matrix(SIZE,SIZE), ITERATIONS, TRIES); \ + std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \ + << " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \ + } + + BENCH_MATRIX(double, 32, 1000, 10); + BENCH_MATRIX(double, 128, 10, 10); + BENCH_MATRIX(double, 512, 1, 6); + BENCH_MATRIX(double, 1024, 1, 4); + + return 0; +} + diff --git a/doc/Mainpage.dox b/doc/Mainpage.dox index fea80519d..115cb2eeb 100644 --- a/doc/Mainpage.dox +++ b/doc/Mainpage.dox @@ -75,7 +75,7 @@ Eigen is well tested with recent versions of GCC and ICC. Both GCC 4.2 and ICC g For best performance, we recommend the following compilation flags:
    -
  • \b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000 \c -falign-functions=64
  • +
  • \b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000
  • \b ICC: \c -O3 \c -DNDEBUG \c -xT \c -ipo \c -no-prec-div \c -no-inline-max-size