mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 10:09:36 +08:00
* basic support for multicore CPU via a .evalOMP() which
internaly uses OpenMP if enabled at compile time. * added a bench/ folder with a couple benchmarks and benchmark tools.
This commit is contained in:
parent
f64311e07d
commit
9d9d81ad71
@ -40,6 +40,7 @@ namespace Eigen {
|
|||||||
#include "src/Core/IO.h"
|
#include "src/Core/IO.h"
|
||||||
#include "src/Core/Swap.h"
|
#include "src/Core/Swap.h"
|
||||||
#include "src/Core/CommaInitializer.h"
|
#include "src/Core/CommaInitializer.h"
|
||||||
|
#include "src/Core/EvalOMP.h"
|
||||||
|
|
||||||
} // namespace Eigen
|
} // namespace Eigen
|
||||||
|
|
||||||
|
118
Eigen/src/Core/EvalOMP.h
Normal file
118
Eigen/src/Core/EvalOMP.h
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef EIGEN_EVAL_OMP_H
|
||||||
|
#define EIGEN_EVAL_OMP_H
|
||||||
|
|
||||||
|
/** \class EvalOMP
|
||||||
|
*
|
||||||
|
* \brief Parallel evaluation of an expression using OpenMP
|
||||||
|
*
|
||||||
|
* The template parameter Expression is the type of the expression that we are evaluating.
|
||||||
|
*
|
||||||
|
* This class is the return type of MatrixBase::evalOMP() and most of the time this is the
|
||||||
|
* only way it is used.
|
||||||
|
*
|
||||||
|
* Note that if OpenMP is not enabled, then this class is equivalent to Eval.
|
||||||
|
*
|
||||||
|
* \sa MatrixBase::evalOMP(), class Eval, MatrixBase::eval()
|
||||||
|
*/
|
||||||
|
template<typename ExpressionType> class EvalOMP : NoOperatorEquals,
|
||||||
|
public Matrix< typename ExpressionType::Scalar,
|
||||||
|
ExpressionType::Traits::RowsAtCompileTime,
|
||||||
|
ExpressionType::Traits::ColsAtCompileTime,
|
||||||
|
EIGEN_DEFAULT_MATRIX_STORAGE_ORDER,
|
||||||
|
ExpressionType::Traits::MaxRowsAtCompileTime,
|
||||||
|
ExpressionType::Traits::MaxColsAtCompileTime>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef typename ExpressionType::Scalar Scalar;
|
||||||
|
|
||||||
|
/** The actual matrix type to evaluate to. This type can be used independently
|
||||||
|
* of the rest of this class to get the actual matrix type to evaluate and store
|
||||||
|
* the value of an expression.
|
||||||
|
*/
|
||||||
|
typedef Matrix<Scalar,
|
||||||
|
ExpressionType::Traits::RowsAtCompileTime,
|
||||||
|
ExpressionType::Traits::ColsAtCompileTime,
|
||||||
|
EIGEN_DEFAULT_MATRIX_STORAGE_ORDER,
|
||||||
|
ExpressionType::Traits::MaxRowsAtCompileTime,
|
||||||
|
ExpressionType::Traits::MaxColsAtCompileTime> MatrixType;
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
explicit EvalOMP(const ExpressionType& other)
|
||||||
|
: MatrixType(other.rows(), other.cols())
|
||||||
|
{
|
||||||
|
#ifdef __INTEL_COMPILER
|
||||||
|
#pragma omp parallel default(none) shared(other)
|
||||||
|
#else
|
||||||
|
#pragma omp parallel default(none)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (this->cols()>this->rows())
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for(int j = 0; j < this->cols(); j++)
|
||||||
|
for(int i = 0; i < this->rows(); i++)
|
||||||
|
this->coeffRef(i, j) = other.coeff(i, j);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for(int i = 0; i < this->rows(); i++)
|
||||||
|
for(int j = 0; j < this->cols(); j++)
|
||||||
|
this->coeffRef(i, j) = other.coeff(i, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
explicit EvalOMP(const ExpressionType& other) : MatrixType(other) {}
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Evaluates *this in a parallel fashion using OpenMP and returns the obtained matrix.
|
||||||
|
*
|
||||||
|
* Of course, it only makes sense to call this function for complex expressions, and/or
|
||||||
|
* large matrices (>32x32), \b and if there is no outer loop which can be parallelized.
|
||||||
|
*
|
||||||
|
* It is the responsibility of the user manage the OpenMP parameters, for instance:
|
||||||
|
* \code
|
||||||
|
* #include <omp.h>
|
||||||
|
* // ...
|
||||||
|
* omp_set_num_threads(omp_get_num_procs());
|
||||||
|
* \endcode
|
||||||
|
* You also need to enable OpenMP on your compiler (e.g., -fopenmp) during both compilation and linking.
|
||||||
|
*
|
||||||
|
* Note that if OpenMP is not enabled, then evalOMP() is equivalent to eval().
|
||||||
|
*
|
||||||
|
* \sa class EvalOMP, eval()
|
||||||
|
*/
|
||||||
|
template<typename Scalar, typename Derived>
|
||||||
|
const EvalOMP<Derived> MatrixBase<Scalar, Derived>::evalOMP() const
|
||||||
|
{
|
||||||
|
return EvalOMP<Derived>(*static_cast<const Derived*>(this));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // EIGEN_EVAL_OMP_H
|
@ -44,6 +44,7 @@ template<typename MatrixType> class DiagonalCoeffs;
|
|||||||
template<typename MatrixType> class Identity;
|
template<typename MatrixType> class Identity;
|
||||||
template<typename MatrixType> class Map;
|
template<typename MatrixType> class Map;
|
||||||
template<typename Derived> class Eval;
|
template<typename Derived> class Eval;
|
||||||
|
template<typename Derived> class EvalOMP;
|
||||||
|
|
||||||
struct ScalarProductOp;
|
struct ScalarProductOp;
|
||||||
struct ScalarQuotientOp;
|
struct ScalarQuotientOp;
|
||||||
|
@ -359,6 +359,7 @@ template<typename Scalar, typename Derived> class MatrixBase
|
|||||||
/// \name special functions
|
/// \name special functions
|
||||||
//@{
|
//@{
|
||||||
const Eval<Derived> eval() const EIGEN_ALWAYS_INLINE;
|
const Eval<Derived> eval() const EIGEN_ALWAYS_INLINE;
|
||||||
|
const EvalOMP<Derived> evalOMP() const EIGEN_ALWAYS_INLINE;
|
||||||
|
|
||||||
template<typename CustomUnaryOp>
|
template<typename CustomUnaryOp>
|
||||||
const CwiseUnaryOp<CustomUnaryOp, Derived> cwise(const CustomUnaryOp& func = CustomUnaryOp()) const;
|
const CwiseUnaryOp<CustomUnaryOp, Derived> cwise(const CustomUnaryOp& func = CustomUnaryOp()) const;
|
||||||
|
75
bench/BenchTimer.h
Normal file
75
bench/BenchTimer.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef EIGEN_BENCH_TIMER_H
|
||||||
|
#define EIGEN_BENCH_TIMER_H
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
namespace Eigen
|
||||||
|
{
|
||||||
|
|
||||||
|
/** Elapsed time timer keeping the best try.
|
||||||
|
*/
|
||||||
|
class BenchTimer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
BenchTimer() : m_best(1e99) {}
|
||||||
|
|
||||||
|
~BenchTimer() {}
|
||||||
|
|
||||||
|
inline void start(void) {m_start = getTime();}
|
||||||
|
inline void stop(void)
|
||||||
|
{
|
||||||
|
m_best = std::min(m_best, getTime() - m_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the best elapsed time.
|
||||||
|
*/
|
||||||
|
inline double value(void)
|
||||||
|
{
|
||||||
|
return m_best;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double getTime(void)
|
||||||
|
{
|
||||||
|
struct timeval tv;
|
||||||
|
struct timezone tz;
|
||||||
|
gettimeofday(&tv, &tz);
|
||||||
|
return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
double m_best, m_start;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // EIGEN_BENCH_TIMER_H
|
28
bench/BenchUtil.h
Normal file
28
bench/BenchUtil.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
#include <Eigen/Core>
|
||||||
|
#include "BenchTimer.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
USING_PART_OF_NAMESPACE_EIGEN
|
||||||
|
|
||||||
|
#include <boost/preprocessor/repetition/enum_params.hpp>
|
||||||
|
#include <boost/preprocessor/repetition.hpp>
|
||||||
|
#include <boost/preprocessor/seq.hpp>
|
||||||
|
#include <boost/preprocessor/array.hpp>
|
||||||
|
#include <boost/preprocessor/arithmetic.hpp>
|
||||||
|
#include <boost/preprocessor/comparison.hpp>
|
||||||
|
#include <boost/preprocessor/punctuation.hpp>
|
||||||
|
#include <boost/preprocessor/punctuation/comma.hpp>
|
||||||
|
#include <boost/preprocessor/stringize.hpp>
|
||||||
|
|
||||||
|
template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline));
|
||||||
|
template<typename MatrixType> void initMatrix_random(MatrixType& mat)
|
||||||
|
{
|
||||||
|
mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
|
||||||
|
template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
|
||||||
|
{
|
||||||
|
mat.setIdentity();
|
||||||
|
}
|
55
bench/README.txt
Normal file
55
bench/README.txt
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
|
||||||
|
This folder contains a couple of benchmark utities and Eigen benchmarks.
|
||||||
|
|
||||||
|
****************************
|
||||||
|
* bench_multi_compilers.sh *
|
||||||
|
****************************
|
||||||
|
|
||||||
|
This script allows to run a benchmark on a set of different compilers/compiler options.
|
||||||
|
It takes two arguments:
|
||||||
|
- a file defining the list of the compilers with their options
|
||||||
|
- the .cpp file of the benchmark
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp
|
||||||
|
|
||||||
|
g++-4.1 -O3 -DNDEBUG -finline-limit=10000
|
||||||
|
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||||
|
0.271102 0.131416 0.422322 0.198633
|
||||||
|
0.201658 0.102436 0.397566 0.207282
|
||||||
|
|
||||||
|
g++-4.2 -O3 -DNDEBUG -finline-limit=10000
|
||||||
|
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||||
|
0.107805 0.0890579 0.30265 0.161843
|
||||||
|
0.127157 0.0712581 0.278341 0.191029
|
||||||
|
|
||||||
|
g++-4.3 -O3 -DNDEBUG -finline-limit=10000
|
||||||
|
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||||
|
0.134318 0.105291 0.3704 0.180966
|
||||||
|
0.137703 0.0732472 0.31225 0.202204
|
||||||
|
|
||||||
|
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size
|
||||||
|
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
|
||||||
|
0.226145 0.0941319 0.371873 0.159433
|
||||||
|
0.109302 0.0837538 0.328102 0.173891
|
||||||
|
|
||||||
|
|
||||||
|
$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp
|
||||||
|
|
||||||
|
g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp
|
||||||
|
double, fixed-size 4x4: 0.00165105s 0.0778739s
|
||||||
|
double, 32x32: 0.0654769s 0.075289s => x0.869674 (2)
|
||||||
|
double, 128x128: 0.054148s 0.0419669s => x1.29025 (2)
|
||||||
|
double, 512x512: 0.913799s 0.428533s => x2.13239 (2)
|
||||||
|
double, 1024x1024: 14.5972s 9.3542s => x1.5605 (2)
|
||||||
|
|
||||||
|
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp
|
||||||
|
double, fixed-size 4x4: 0.000589848s 0.019949s
|
||||||
|
double, 32x32: 0.0682781s 0.0449722s => x1.51823 (2)
|
||||||
|
double, 128x128: 0.0547509s 0.0435519s => x1.25714 (2)
|
||||||
|
double, 512x512: 0.829436s 0.424438s => x1.9542 (2)
|
||||||
|
double, 1024x1024: 14.5243s 10.7735s => x1.34815 (2)
|
||||||
|
|
||||||
|
|
||||||
|
|
12
bench/basicbench.cxxlist
Normal file
12
bench/basicbench.cxxlist
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG"
|
||||||
|
CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000"
|
||||||
|
|
||||||
|
CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG"
|
||||||
|
CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000"
|
||||||
|
|
||||||
|
CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG"
|
||||||
|
CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000"
|
||||||
|
|
||||||
|
CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size"
|
46
bench/basicbenchmark.cpp
Normal file
46
bench/basicbenchmark.cpp
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
|
||||||
|
#include "BenchUtil.h"
|
||||||
|
#include "basicbenchmark.h"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
// disbale floating point exceptions
|
||||||
|
// this leads to more stable bench results
|
||||||
|
// (this is done by default by ICC)
|
||||||
|
#ifndef __INTEL_COMPILER
|
||||||
|
{
|
||||||
|
int aux;
|
||||||
|
asm(
|
||||||
|
"stmxcsr %[aux] \n\t"
|
||||||
|
"orl $32832, %[aux] \n\t"
|
||||||
|
"ldmxcsr %[aux] \n\t"
|
||||||
|
: : [aux] "m" (aux));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// this is the list of matrix type and size we want to bench:
|
||||||
|
// ((suffix) (matrix size) (number of iterations))
|
||||||
|
#define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000))
|
||||||
|
// #define MODES ((Xd)(20)(10000))
|
||||||
|
|
||||||
|
#define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \
|
||||||
|
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \
|
||||||
|
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
|
||||||
|
|
||||||
|
std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl;
|
||||||
|
|
||||||
|
const int tries = 10;
|
||||||
|
|
||||||
|
#define _RUN_BENCH(R,ARG,EL) \
|
||||||
|
std::cout << ARG( \
|
||||||
|
BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\
|
||||||
|
BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \
|
||||||
|
<< " ";
|
||||||
|
|
||||||
|
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES );
|
||||||
|
std::cout << endl;
|
||||||
|
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES );
|
||||||
|
std::cout << endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
59
bench/basicbenchmark.h
Normal file
59
bench/basicbenchmark.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
|
||||||
|
enum {LazyEval, EarlyEval, OmpEval};
|
||||||
|
|
||||||
|
template<int Mode, typename MatrixType>
|
||||||
|
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
|
||||||
|
|
||||||
|
template<int Mode, typename MatrixType>
|
||||||
|
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations)
|
||||||
|
{
|
||||||
|
for(int a = 0; a < iterations; a++)
|
||||||
|
{
|
||||||
|
if (Mode==LazyEval)
|
||||||
|
{
|
||||||
|
asm("#begin_bench_loop LazyEval");
|
||||||
|
if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
|
||||||
|
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||||
|
}
|
||||||
|
else if (Mode==OmpEval)
|
||||||
|
{
|
||||||
|
asm("#begin_bench_loop OmpEval");
|
||||||
|
if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
|
||||||
|
m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
asm("#begin_bench_loop EarlyEval");
|
||||||
|
if (MatrixType::Traits::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
|
||||||
|
m = I + 0.00005 * (m + m * m);
|
||||||
|
}
|
||||||
|
asm("#end_bench_loop");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int Mode, typename MatrixType>
|
||||||
|
double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
|
||||||
|
|
||||||
|
template<int Mode, typename MatrixType>
|
||||||
|
double benchBasic(const MatrixType& mat, int iterations, int tries)
|
||||||
|
{
|
||||||
|
const int rows = mat.rows();
|
||||||
|
const int cols = mat.cols();
|
||||||
|
|
||||||
|
MatrixType I(rows,cols);
|
||||||
|
MatrixType m(rows,cols);
|
||||||
|
|
||||||
|
initMatrix_identity(I);
|
||||||
|
|
||||||
|
Eigen::BenchTimer timer;
|
||||||
|
for(uint t=0; t<tries; ++t)
|
||||||
|
{
|
||||||
|
initMatrix_random(m);
|
||||||
|
timer.start();
|
||||||
|
benchBasic_loop<Mode>(I, m, iterations);
|
||||||
|
timer.stop();
|
||||||
|
cerr << m;
|
||||||
|
}
|
||||||
|
return timer.value();
|
||||||
|
};
|
||||||
|
|
28
bench/bench_multi_compilers.sh
Executable file
28
bench/bench_multi_compilers.sh
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if (($# < 2)); then
|
||||||
|
echo "Usage: $0 compilerlist.txt benchfile.cpp"
|
||||||
|
else
|
||||||
|
|
||||||
|
compilerlist=$1
|
||||||
|
benchfile=$2
|
||||||
|
|
||||||
|
g=0
|
||||||
|
source $compilerlist
|
||||||
|
|
||||||
|
# for each compiler, compile benchfile and run the benchmark
|
||||||
|
for (( i=0 ; i<g ; ++i )) ; do
|
||||||
|
# check the compiler exists
|
||||||
|
compiler=`echo ${CLIST[$i]} | cut -d " " -f 1`
|
||||||
|
if [ -e `which $compiler` ]; then
|
||||||
|
echo "${CLIST[$i]}"
|
||||||
|
# echo "${CLIST[$i]} $benchfile -I.. -o bench~"
|
||||||
|
if [ -e ./.bench ] ; then rm .bench; fi
|
||||||
|
${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null
|
||||||
|
echo ""
|
||||||
|
else
|
||||||
|
echo "compiler not found: $compiler"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
fi
|
@ -13,7 +13,7 @@ int main(int argc, char *argv[])
|
|||||||
{
|
{
|
||||||
m(i,j) = 0.1 * (i+20*j);
|
m(i,j) = 0.1 * (i+20*j);
|
||||||
}
|
}
|
||||||
for(int a = 0; a < 1000000; a++)
|
for(int a = 0; a < 100000; a++)
|
||||||
{
|
{
|
||||||
m = I + 0.00005 * (m + m*m);
|
m = I + 0.00005 * (m + m*m);
|
||||||
}
|
}
|
7
bench/ompbench.cxxlist
Normal file
7
bench/ompbench.cxxlist
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
|
||||||
|
|
||||||
|
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=10000 -fopenmp"
|
||||||
|
|
||||||
|
CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp"
|
81
bench/ompbenchmark.cpp
Normal file
81
bench/ompbenchmark.cpp
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
// g++ -O3 -DNDEBUG -I.. -fopenmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
|
||||||
|
// icpc -fast -fno-exceptions -DNDEBUG -I.. -openmp benchOpenMP.cpp -o benchOpenMP && ./benchOpenMP 2> /dev/null
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
#include "BenchUtil.h"
|
||||||
|
#include "basicbenchmark.h"
|
||||||
|
|
||||||
|
// #include <Eigen/Core>
|
||||||
|
// #include "BenchTimer.h"
|
||||||
|
//
|
||||||
|
// using namespace std;
|
||||||
|
// USING_PART_OF_NAMESPACE_EIGEN
|
||||||
|
//
|
||||||
|
// enum {LazyEval, EarlyEval, OmpEval};
|
||||||
|
//
|
||||||
|
// template<int Mode, typename MatrixType>
|
||||||
|
// double benchSingleProc(const MatrixType& mat, int iterations, int tries) __attribute__((noinline));
|
||||||
|
//
|
||||||
|
// template<int Mode, typename MatrixType>
|
||||||
|
// double benchBasic(const MatrixType& mat, int iterations, int tries)
|
||||||
|
// {
|
||||||
|
// const int rows = mat.rows();
|
||||||
|
// const int cols = mat.cols();
|
||||||
|
//
|
||||||
|
// Eigen::BenchTimer timer;
|
||||||
|
// for(uint t=0; t<tries; ++t)
|
||||||
|
// {
|
||||||
|
// MatrixType I = MatrixType::identity(rows, cols);
|
||||||
|
// MatrixType m = MatrixType::random(rows, cols);
|
||||||
|
//
|
||||||
|
// timer.start();
|
||||||
|
// for(int a = 0; a < iterations; a++)
|
||||||
|
// {
|
||||||
|
// if(Mode==LazyEval)
|
||||||
|
// m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||||
|
// else if(Mode==OmpEval)
|
||||||
|
// m = (I + 0.00005 * (m + m.lazyProduct(m))).evalOMP();
|
||||||
|
// else
|
||||||
|
// m = I + 0.00005 * (m + m * m);
|
||||||
|
// }
|
||||||
|
// timer.stop();
|
||||||
|
// cerr << m;
|
||||||
|
// }
|
||||||
|
// return timer.value();
|
||||||
|
// };
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
// disbale floating point exceptions
|
||||||
|
// this leads to more stable bench results
|
||||||
|
{
|
||||||
|
int aux;
|
||||||
|
asm(
|
||||||
|
"stmxcsr %[aux] \n\t"
|
||||||
|
"orl $32832, %[aux] \n\t"
|
||||||
|
"ldmxcsr %[aux] \n\t"
|
||||||
|
: : [aux] "m" (aux));
|
||||||
|
}
|
||||||
|
|
||||||
|
// commented since the default setting is use as many threads as processors
|
||||||
|
//omp_set_num_threads(omp_get_num_procs());
|
||||||
|
|
||||||
|
std::cout << "double, fixed-size 4x4: "
|
||||||
|
<< benchBasic<LazyEval>(Matrix4d(), 10000, 10) << "s "
|
||||||
|
<< benchBasic<OmpEval>(Matrix4d(), 10000, 10) << "s \n";
|
||||||
|
|
||||||
|
#define BENCH_MATRIX(TYPE, SIZE, ITERATIONS, TRIES) {\
|
||||||
|
double single = benchBasic<LazyEval>(Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
|
||||||
|
double omp = benchBasic<OmpEval> (Matrix<TYPE,Eigen::Dynamic,Eigen::Dynamic>(SIZE,SIZE), ITERATIONS, TRIES); \
|
||||||
|
std::cout << #TYPE << ", " << #SIZE << "x" << #SIZE << ": " << single << "s " << omp << "s " \
|
||||||
|
<< " => x" << single/omp << " (" << omp_get_num_procs() << ")" << std::endl; \
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCH_MATRIX(double, 32, 1000, 10);
|
||||||
|
BENCH_MATRIX(double, 128, 10, 10);
|
||||||
|
BENCH_MATRIX(double, 512, 1, 6);
|
||||||
|
BENCH_MATRIX(double, 1024, 1, 4);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -75,7 +75,7 @@ Eigen is well tested with recent versions of GCC and ICC. Both GCC 4.2 and ICC g
|
|||||||
|
|
||||||
For best performance, we recommend the following compilation flags:
|
For best performance, we recommend the following compilation flags:
|
||||||
<ul>
|
<ul>
|
||||||
<li>\b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000 \c -falign-functions=64 </li>
|
<li>\b GCC: \c -O3 \c -DNDEBUG \c -finline-limit=10000 </li>
|
||||||
<li>\b ICC: \c -O3 \c -DNDEBUG \c -xT \c -ipo \c -no-prec-div \c -no-inline-max-size </li>
|
<li>\b ICC: \c -O3 \c -DNDEBUG \c -xT \c -ipo \c -no-prec-div \c -no-inline-max-size </li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user