mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-02 03:05:10 +08:00
Clang-format tests, examples, libraries, benchmarks, etc.
This commit is contained in:
parent
3252ecc7a4
commit
46e9cdb7fe
@ -24,33 +24,25 @@ typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
|
||||
typedef Matrix<Scalar, Dynamic, 1> DenseVector;
|
||||
typedef SparseMatrix<Scalar> EigenSparseMatrix;
|
||||
|
||||
void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst)
|
||||
{
|
||||
void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) {
|
||||
dst.reserve(double(rows) * cols * density);
|
||||
for(int j = 0; j < cols; j++)
|
||||
{
|
||||
for(int i = 0; i < rows; i++)
|
||||
{
|
||||
for (int j = 0; j < cols; j++) {
|
||||
for (int i = 0; i < rows; i++) {
|
||||
Scalar v = (internal::random<float>(0, 1) < density) ? internal::random<Scalar>() : 0;
|
||||
if (v!=0)
|
||||
dst.insert(i,j) = v;
|
||||
if (v != 0) dst.insert(i, j) = v;
|
||||
}
|
||||
}
|
||||
dst.finalize();
|
||||
}
|
||||
|
||||
void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst)
|
||||
{
|
||||
void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) {
|
||||
// std::cout << "alloc " << nnzPerCol*cols << "\n";
|
||||
dst.reserve(nnzPerCol * cols);
|
||||
for(int j = 0; j < cols; j++)
|
||||
{
|
||||
for (int j = 0; j < cols; j++) {
|
||||
std::set<int> aux;
|
||||
for(int i = 0; i < nnzPerCol; i++)
|
||||
{
|
||||
for (int i = 0; i < nnzPerCol; i++) {
|
||||
int k = internal::random<int>(0, rows - 1);
|
||||
while (aux.find(k)!=aux.end())
|
||||
k = internal::random<int>(0,rows-1);
|
||||
while (aux.find(k) != aux.end()) k = internal::random<int>(0, rows - 1);
|
||||
aux.insert(k);
|
||||
|
||||
dst.insert(k, j) = internal::random<Scalar>();
|
||||
@ -59,24 +51,20 @@ void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst)
|
||||
dst.finalize();
|
||||
}
|
||||
|
||||
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst)
|
||||
{
|
||||
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) {
|
||||
dst.setZero();
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
|
||||
dst(it.index(),j) = it.value();
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
|
||||
}
|
||||
|
||||
#ifndef NOGMM
|
||||
#include "gmm/gmm.h"
|
||||
typedef gmm::csc_matrix<Scalar> GmmSparse;
|
||||
typedef gmm::col_matrix<gmm::wsvector<Scalar> > GmmDynSparse;
|
||||
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
|
||||
{
|
||||
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) {
|
||||
GmmDynSparse tmp(src.rows(), src.cols());
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
|
||||
tmp(it.index(),j) = it.value();
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) tmp(it.index(), j) = it.value();
|
||||
gmm::copy(tmp, dst);
|
||||
}
|
||||
#endif
|
||||
@ -85,12 +73,10 @@ void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
|
||||
#include <boost/numeric/mtl/mtl.hpp>
|
||||
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
|
||||
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
|
||||
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
|
||||
{
|
||||
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) {
|
||||
mtl::matrix::inserter<MtlSparse> ins(dst);
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
|
||||
ins[it.index()][j] = it.value();
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) ins[it.index()][j] = it.value();
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -98,13 +84,11 @@ void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
|
||||
extern "C" {
|
||||
#include "cs.h"
|
||||
}
|
||||
void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
|
||||
{
|
||||
void eiToCSparse(const EigenSparseMatrix& src, cs*& dst) {
|
||||
cs* aux = cs_spalloc(0, 0, 1, 1, 1);
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
|
||||
if (!cs_entry(aux, it.index(), j, it.value()))
|
||||
{
|
||||
if (!cs_entry(aux, it.index(), j, it.value())) {
|
||||
std::cout << "cs_entry error\n";
|
||||
exit(2);
|
||||
}
|
||||
@ -125,20 +109,16 @@ void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
|
||||
|
||||
typedef boost::numeric::ublas::compressed_matrix<Scalar, boost::numeric::ublas::column_major> UBlasSparse;
|
||||
|
||||
void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst)
|
||||
{
|
||||
void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) {
|
||||
dst.resize(src.rows(), src.cols(), false);
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
|
||||
dst(it.index(),j) = it.value();
|
||||
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
|
||||
}
|
||||
|
||||
template <typename EigenType, typename UblasType>
|
||||
void eiToUblasVec(const EigenType& src, UblasType& dst)
|
||||
{
|
||||
void eiToUblasVec(const EigenType& src, UblasType& dst) {
|
||||
dst.resize(src.size());
|
||||
for (int j=0; j<src.size(); ++j)
|
||||
dst[j] = src.coeff(j);
|
||||
for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -41,13 +41,9 @@ static void clobber() {
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
namespace Eigen {
|
||||
|
||||
enum {
|
||||
CPU_TIMER = 0,
|
||||
REAL_TIMER = 1
|
||||
};
|
||||
enum { CPU_TIMER = 0, REAL_TIMER = 1 };
|
||||
|
||||
/** Elapsed time timer keeping the best try.
|
||||
*
|
||||
@ -56,12 +52,9 @@ enum {
|
||||
*
|
||||
* Important: on linux, you must link with -lrt
|
||||
*/
|
||||
class BenchTimer
|
||||
{
|
||||
class BenchTimer {
|
||||
public:
|
||||
|
||||
BenchTimer()
|
||||
{
|
||||
BenchTimer() {
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
@ -72,19 +65,16 @@ public:
|
||||
|
||||
~BenchTimer() {}
|
||||
|
||||
inline void reset()
|
||||
{
|
||||
inline void reset() {
|
||||
m_bests.fill(1e9);
|
||||
m_worsts.fill(0);
|
||||
m_totals.setZero();
|
||||
}
|
||||
inline void start()
|
||||
{
|
||||
inline void start() {
|
||||
m_starts[CPU_TIMER] = getCpuTime();
|
||||
m_starts[REAL_TIMER] = getRealTime();
|
||||
}
|
||||
inline void stop()
|
||||
{
|
||||
inline void stop() {
|
||||
m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
|
||||
m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
|
||||
#if EIGEN_VERSION_AT_LEAST(2, 90, 0)
|
||||
@ -101,34 +91,21 @@ public:
|
||||
|
||||
/** Return the elapsed time in seconds between the last start/stop pair
|
||||
*/
|
||||
inline double value(int TIMER = CPU_TIMER) const
|
||||
{
|
||||
return m_times[TIMER];
|
||||
}
|
||||
inline double value(int TIMER = CPU_TIMER) const { return m_times[TIMER]; }
|
||||
|
||||
/** Return the best elapsed time in seconds
|
||||
*/
|
||||
inline double best(int TIMER = CPU_TIMER) const
|
||||
{
|
||||
return m_bests[TIMER];
|
||||
}
|
||||
inline double best(int TIMER = CPU_TIMER) const { return m_bests[TIMER]; }
|
||||
|
||||
/** Return the worst elapsed time in seconds
|
||||
*/
|
||||
inline double worst(int TIMER = CPU_TIMER) const
|
||||
{
|
||||
return m_worsts[TIMER];
|
||||
}
|
||||
inline double worst(int TIMER = CPU_TIMER) const { return m_worsts[TIMER]; }
|
||||
|
||||
/** Return the total elapsed time in seconds.
|
||||
*/
|
||||
inline double total(int TIMER = CPU_TIMER) const
|
||||
{
|
||||
return m_totals[TIMER];
|
||||
}
|
||||
inline double total(int TIMER = CPU_TIMER) const { return m_totals[TIMER]; }
|
||||
|
||||
inline double getCpuTime() const
|
||||
{
|
||||
inline double getCpuTime() const {
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER query_ticks;
|
||||
QueryPerformanceCounter(&query_ticks);
|
||||
@ -142,8 +119,7 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
inline double getRealTime() const
|
||||
{
|
||||
inline double getRealTime() const {
|
||||
#ifdef _WIN32
|
||||
SYSTEMTIME st;
|
||||
GetSystemTime(&st);
|
||||
@ -171,7 +147,8 @@ public:
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
||||
};
|
||||
|
||||
#define BENCH(TIMER,TRIES,REP,CODE) { \
|
||||
#define BENCH(TIMER, TRIES, REP, CODE) \
|
||||
{ \
|
||||
TIMER.reset(); \
|
||||
for (int uglyvarname1 = 0; uglyvarname1 < TRIES; ++uglyvarname1) { \
|
||||
TIMER.start(); \
|
||||
@ -183,7 +160,7 @@ public:
|
||||
} \
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace Eigen
|
||||
|
||||
// clean #defined tokens
|
||||
#ifdef EIGEN_BT_UNDEF_NOMINMAX
|
||||
|
@ -18,26 +18,29 @@ using namespace Eigen;
|
||||
#include <boost/preprocessor/punctuation/comma.hpp>
|
||||
#include <boost/preprocessor/stringize.hpp>
|
||||
|
||||
template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline));
|
||||
template<typename MatrixType> void initMatrix_random(MatrixType& mat)
|
||||
{
|
||||
template <typename MatrixType>
|
||||
void initMatrix_random(MatrixType& mat) __attribute__((noinline));
|
||||
template <typename MatrixType>
|
||||
void initMatrix_random(MatrixType& mat) {
|
||||
mat.setRandom(); // = MatrixType::random(mat.rows(), mat.cols());
|
||||
}
|
||||
|
||||
template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
|
||||
template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
|
||||
{
|
||||
template <typename MatrixType>
|
||||
void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
|
||||
template <typename MatrixType>
|
||||
void initMatrix_identity(MatrixType& mat) {
|
||||
mat.setIdentity();
|
||||
}
|
||||
|
||||
#ifndef __INTEL_COMPILER
|
||||
#define DISABLE_SSE_EXCEPTIONS() { \
|
||||
#define DISABLE_SSE_EXCEPTIONS() \
|
||||
{ \
|
||||
int aux; \
|
||||
asm( \
|
||||
"stmxcsr %[aux] \n\t" \
|
||||
asm("stmxcsr %[aux] \n\t" \
|
||||
"orl $32832, %[aux] \n\t" \
|
||||
"ldmxcsr %[aux] \n\t" \
|
||||
: : [aux] "m" (aux)); \
|
||||
: \
|
||||
: [aux] "m"(aux)); \
|
||||
}
|
||||
#else
|
||||
#define DISABLE_SSE_EXCEPTIONS()
|
||||
@ -46,26 +49,21 @@ template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
|
||||
#ifdef BENCH_GMM
|
||||
#include <gmm/gmm.h>
|
||||
template <typename EigenMatrixType, typename GmmMatrixType>
|
||||
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
|
||||
{
|
||||
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) {
|
||||
dst.resize(src.rows(), src.cols());
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (int i=0; i<src.rows(); ++i)
|
||||
dst(i,j) = src.coeff(i,j);
|
||||
for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef BENCH_GSL
|
||||
#include <gsl/gsl_matrix.h>
|
||||
#include <gsl/gsl_linalg.h>
|
||||
#include <gsl/gsl_eigen.h>
|
||||
template <typename EigenMatrixType>
|
||||
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
|
||||
{
|
||||
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) {
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (int i=0; i<src.rows(); ++i)
|
||||
gsl_matrix_set(*dst, i, j, src.coeff(i,j));
|
||||
for (int i = 0; i < src.rows(); ++i) gsl_matrix_set(*dst, i, j, src.coeff(i, j));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -73,19 +71,15 @@ void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
|
||||
#include <boost/numeric/ublas/matrix.hpp>
|
||||
#include <boost/numeric/ublas/vector.hpp>
|
||||
template <typename EigenMatrixType, typename UblasMatrixType>
|
||||
void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst)
|
||||
{
|
||||
void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst) {
|
||||
dst.resize(src.rows(), src.cols());
|
||||
for (int j = 0; j < src.cols(); ++j)
|
||||
for (int i=0; i<src.rows(); ++i)
|
||||
dst(i,j) = src.coeff(i,j);
|
||||
for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
|
||||
}
|
||||
template <typename EigenType, typename UblasType>
|
||||
void eiToUblasVec(const EigenType& src, UblasType& dst)
|
||||
{
|
||||
void eiToUblasVec(const EigenType& src, UblasType& dst) {
|
||||
dst.resize(src.size());
|
||||
for (int j=0; j<src.size(); ++j)
|
||||
dst[j] = src.coeff(j);
|
||||
for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -37,20 +37,17 @@ uint8_t log2_pot(size_t x) {
|
||||
return l;
|
||||
}
|
||||
|
||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
|
||||
{
|
||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
|
||||
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
|
||||
}
|
||||
|
||||
// just a helper to store a triple of K,M,N sizes for matrix product
|
||||
struct size_triple_t
|
||||
{
|
||||
struct size_triple_t {
|
||||
uint16_t k, m, n;
|
||||
size_triple_t() : k(0), m(0), n(0) {}
|
||||
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
|
||||
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
|
||||
size_triple_t(uint16_t compact)
|
||||
{
|
||||
size_triple_t(uint16_t compact) {
|
||||
k = 1 << ((compact & 0xf00) >> 8);
|
||||
m = 1 << ((compact & 0x0f0) >> 4);
|
||||
n = 1 << ((compact & 0x00f) >> 0);
|
||||
@ -58,35 +55,23 @@ struct size_triple_t
|
||||
bool is_cubic() const { return k == m && m == n; }
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream& s, const size_triple_t& t)
|
||||
{
|
||||
return s << "(" << t.k << ", " << t.m << ", " << t.n << ")";
|
||||
}
|
||||
ostream& operator<<(ostream& s, const size_triple_t& t) { return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; }
|
||||
|
||||
struct inputfile_entry_t
|
||||
{
|
||||
struct inputfile_entry_t {
|
||||
uint16_t product_size;
|
||||
uint16_t pot_block_size;
|
||||
size_triple_t nonpot_block_size;
|
||||
float gflops;
|
||||
};
|
||||
|
||||
struct inputfile_t
|
||||
{
|
||||
enum class type_t {
|
||||
unknown,
|
||||
all_pot_sizes,
|
||||
default_sizes
|
||||
};
|
||||
struct inputfile_t {
|
||||
enum class type_t { unknown, all_pot_sizes, default_sizes };
|
||||
|
||||
string filename;
|
||||
vector<inputfile_entry_t> entries;
|
||||
type_t type;
|
||||
|
||||
inputfile_t(const string& fname)
|
||||
: filename(fname)
|
||||
, type(type_t::unknown)
|
||||
{
|
||||
inputfile_t(const string& fname) : filename(fname), type(type_t::unknown) {
|
||||
ifstream stream(filename);
|
||||
if (!stream.is_open()) {
|
||||
cerr << "couldn't open input file: " << filename << endl;
|
||||
@ -112,7 +97,6 @@ struct inputfile_t
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (type == type_t::unknown) {
|
||||
continue;
|
||||
}
|
||||
@ -120,18 +104,9 @@ struct inputfile_t
|
||||
case type_t::all_pot_sizes: {
|
||||
unsigned int product_size, block_size;
|
||||
float gflops;
|
||||
int sscanf_result =
|
||||
sscanf(line.c_str(), "%x %x %f",
|
||||
&product_size,
|
||||
&block_size,
|
||||
&gflops);
|
||||
if (3 != sscanf_result ||
|
||||
!product_size ||
|
||||
product_size > 0xfff ||
|
||||
!block_size ||
|
||||
block_size > 0xfff ||
|
||||
!isfinite(gflops))
|
||||
{
|
||||
int sscanf_result = sscanf(line.c_str(), "%x %x %f", &product_size, &block_size, &gflops);
|
||||
if (3 != sscanf_result || !product_size || product_size > 0xfff || !block_size || block_size > 0xfff ||
|
||||
!isfinite(gflops)) {
|
||||
cerr << "ill-formed input file: " << filename << endl;
|
||||
cerr << "offending line:" << endl << line << endl;
|
||||
exit(1);
|
||||
@ -150,16 +125,8 @@ struct inputfile_t
|
||||
unsigned int product_size;
|
||||
float gflops;
|
||||
int bk, bm, bn;
|
||||
int sscanf_result =
|
||||
sscanf(line.c_str(), "%x default(%d, %d, %d) %f",
|
||||
&product_size,
|
||||
&bk, &bm, &bn,
|
||||
&gflops);
|
||||
if (5 != sscanf_result ||
|
||||
!product_size ||
|
||||
product_size > 0xfff ||
|
||||
!isfinite(gflops))
|
||||
{
|
||||
int sscanf_result = sscanf(line.c_str(), "%x default(%d, %d, %d) %f", &product_size, &bk, &bm, &bn, &gflops);
|
||||
if (5 != sscanf_result || !product_size || product_size > 0xfff || !isfinite(gflops)) {
|
||||
cerr << "ill-formed input file: " << filename << endl;
|
||||
cerr << "offending line:" << endl << line << endl;
|
||||
exit(1);
|
||||
@ -192,27 +159,22 @@ struct inputfile_t
|
||||
}
|
||||
};
|
||||
|
||||
struct preprocessed_inputfile_entry_t
|
||||
{
|
||||
struct preprocessed_inputfile_entry_t {
|
||||
uint16_t product_size;
|
||||
uint16_t block_size;
|
||||
|
||||
float efficiency;
|
||||
};
|
||||
|
||||
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2)
|
||||
{
|
||||
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) {
|
||||
return e1.efficiency < e2.efficiency;
|
||||
}
|
||||
|
||||
struct preprocessed_inputfile_t
|
||||
{
|
||||
struct preprocessed_inputfile_t {
|
||||
string filename;
|
||||
vector<preprocessed_inputfile_entry_t> entries;
|
||||
|
||||
preprocessed_inputfile_t(const inputfile_t& inputfile)
|
||||
: filename(inputfile.filename)
|
||||
{
|
||||
preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) {
|
||||
if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
|
||||
abort();
|
||||
}
|
||||
@ -220,9 +182,7 @@ struct preprocessed_inputfile_t
|
||||
auto it_first_with_given_product_size = it;
|
||||
while (it != inputfile.entries.end()) {
|
||||
++it;
|
||||
if (it == inputfile.entries.end() ||
|
||||
it->product_size != it_first_with_given_product_size->product_size)
|
||||
{
|
||||
if (it == inputfile.entries.end() || it->product_size != it_first_with_given_product_size->product_size) {
|
||||
import_input_file_range_one_product_size(it_first_with_given_product_size, it);
|
||||
it_first_with_given_product_size = it;
|
||||
}
|
||||
@ -230,10 +190,8 @@ struct preprocessed_inputfile_t
|
||||
}
|
||||
|
||||
private:
|
||||
void import_input_file_range_one_product_size(
|
||||
const vector<inputfile_entry_t>::const_iterator& begin,
|
||||
const vector<inputfile_entry_t>::const_iterator& end)
|
||||
{
|
||||
void import_input_file_range_one_product_size(const vector<inputfile_entry_t>::const_iterator& begin,
|
||||
const vector<inputfile_entry_t>::const_iterator& end) {
|
||||
uint16_t product_size = begin->product_size;
|
||||
float max_gflops = 0.0f;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
@ -254,9 +212,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
void check_all_files_in_same_exact_order(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles)
|
||||
{
|
||||
void check_all_files_in_same_exact_order(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) {
|
||||
if (preprocessed_inputfiles.empty()) {
|
||||
return;
|
||||
}
|
||||
@ -266,11 +222,8 @@ void check_all_files_in_same_exact_order(
|
||||
|
||||
for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
|
||||
if (preprocessed_inputfiles[i].entries.size() != num_entries) {
|
||||
cerr << "these files have different number of entries: "
|
||||
<< preprocessed_inputfiles[i].filename
|
||||
<< " and "
|
||||
<< first_file.filename
|
||||
<< endl;
|
||||
cerr << "these files have different number of entries: " << preprocessed_inputfiles[i].filename << " and "
|
||||
<< first_file.filename << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@ -281,12 +234,8 @@ void check_all_files_in_same_exact_order(
|
||||
for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) {
|
||||
const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index];
|
||||
if (cur_file.entries[entry_index].product_size != entry_product_size ||
|
||||
cur_file.entries[entry_index].block_size != entry_block_size)
|
||||
{
|
||||
cerr << "entries not in same order between these files: "
|
||||
<< first_file.filename
|
||||
<< " and "
|
||||
<< cur_file.filename
|
||||
cur_file.entries[entry_index].block_size != entry_block_size) {
|
||||
cerr << "entries not in same order between these files: " << first_file.filename << " and " << cur_file.filename
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
@ -294,10 +243,8 @@ void check_all_files_in_same_exact_order(
|
||||
}
|
||||
}
|
||||
|
||||
float efficiency_of_subset(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<size_t>& subset)
|
||||
{
|
||||
float efficiency_of_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<size_t>& subset) {
|
||||
if (subset.size() <= 1) {
|
||||
return 1.0f;
|
||||
}
|
||||
@ -309,9 +256,7 @@ float efficiency_of_subset(
|
||||
uint16_t product_size = first_file.entries[0].product_size;
|
||||
while (entry_index < num_entries) {
|
||||
++entry_index;
|
||||
if (entry_index == num_entries ||
|
||||
first_file.entries[entry_index].product_size != product_size)
|
||||
{
|
||||
if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
|
||||
float efficiency_this_product_size = 0.0f;
|
||||
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
|
||||
float efficiency_this_entry = 1.0f;
|
||||
@ -331,10 +276,8 @@ float efficiency_of_subset(
|
||||
return efficiency;
|
||||
}
|
||||
|
||||
void dump_table_for_subset(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<size_t>& subset)
|
||||
{
|
||||
void dump_table_for_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<size_t>& subset) {
|
||||
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
|
||||
const size_t num_entries = first_file.entries.size();
|
||||
size_t entry_index = 0;
|
||||
@ -359,9 +302,7 @@ void dump_table_for_subset(
|
||||
cout << " static const unsigned short data[" << TableSize << "] = {";
|
||||
while (entry_index < num_entries) {
|
||||
++entry_index;
|
||||
if (entry_index == num_entries ||
|
||||
first_file.entries[entry_index].product_size != product_size)
|
||||
{
|
||||
if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
|
||||
float best_efficiency_this_product_size = 0.0f;
|
||||
uint16_t best_block_size_this_product_size = 0;
|
||||
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
|
||||
@ -397,10 +338,8 @@ void dump_table_for_subset(
|
||||
cout << "};" << endl;
|
||||
}
|
||||
|
||||
float efficiency_of_partition(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<vector<size_t>>& partition)
|
||||
{
|
||||
float efficiency_of_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<vector<size_t>>& partition) {
|
||||
float efficiency = 1.0f;
|
||||
for (auto s = partition.begin(); s != partition.end(); ++s) {
|
||||
efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s));
|
||||
@ -408,8 +347,7 @@ float efficiency_of_partition(
|
||||
return efficiency;
|
||||
}
|
||||
|
||||
void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size)
|
||||
{
|
||||
void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size) {
|
||||
assert(subset_size >= 1 && subset_size <= set_size);
|
||||
out_subset.resize(subset_size);
|
||||
for (size_t i = 0; i < subset_size; i++) {
|
||||
@ -417,13 +355,9 @@ void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t se
|
||||
}
|
||||
}
|
||||
|
||||
bool is_last_subset(const vector<size_t>& subset, size_t set_size)
|
||||
{
|
||||
return subset[0] == set_size - subset.size();
|
||||
}
|
||||
bool is_last_subset(const vector<size_t>& subset, size_t set_size) { return subset[0] == set_size - subset.size(); }
|
||||
|
||||
void next_subset(vector<size_t>& inout_subset, size_t set_size)
|
||||
{
|
||||
void next_subset(vector<size_t>& inout_subset, size_t set_size) {
|
||||
if (is_last_subset(inout_subset, set_size)) {
|
||||
cerr << "iterating past the last subset" << endl;
|
||||
abort();
|
||||
@ -444,8 +378,7 @@ void next_subset(vector<size_t>& inout_subset, size_t set_size)
|
||||
const size_t number_of_subsets_limit = 100;
|
||||
const size_t always_search_subsets_of_size_at_least = 2;
|
||||
|
||||
bool is_number_of_subsets_feasible(size_t n, size_t p)
|
||||
{
|
||||
bool is_number_of_subsets_feasible(size_t n, size_t p) {
|
||||
assert(n > 0 && p > 0 && p <= n);
|
||||
uint64_t numerator = 1, denominator = 1;
|
||||
for (size_t i = 0; i < p; i++) {
|
||||
@ -458,8 +391,7 @@ bool is_number_of_subsets_feasible(size_t n, size_t p)
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t max_feasible_subset_size(size_t n)
|
||||
{
|
||||
size_t max_feasible_subset_size(size_t n) {
|
||||
assert(n > 0);
|
||||
const size_t minresult = min<size_t>(n - 1, always_search_subsets_of_size_at_least);
|
||||
for (size_t p = 1; p <= n - 1; p++) {
|
||||
@ -470,12 +402,9 @@ size_t max_feasible_subset_size(size_t n)
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
void find_subset_with_efficiency_higher_than(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
float required_efficiency_to_beat,
|
||||
vector<size_t>& inout_remainder,
|
||||
vector<size_t>& out_subset)
|
||||
{
|
||||
void find_subset_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
float required_efficiency_to_beat, vector<size_t>& inout_remainder,
|
||||
vector<size_t>& out_subset) {
|
||||
out_subset.resize(0);
|
||||
|
||||
if (required_efficiency_to_beat >= 1.0f) {
|
||||
@ -484,7 +413,6 @@ void find_subset_with_efficiency_higher_than(
|
||||
}
|
||||
|
||||
while (!inout_remainder.empty()) {
|
||||
|
||||
vector<size_t> candidate_indices(inout_remainder.size());
|
||||
for (size_t i = 0; i < candidate_indices.size(); i++) {
|
||||
candidate_indices[i] = i;
|
||||
@ -493,16 +421,13 @@ void find_subset_with_efficiency_higher_than(
|
||||
size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size());
|
||||
while (candidate_indices_subset_size >= 1) {
|
||||
vector<size_t> candidate_indices_subset;
|
||||
make_first_subset(candidate_indices_subset_size,
|
||||
candidate_indices_subset,
|
||||
candidate_indices.size());
|
||||
make_first_subset(candidate_indices_subset_size, candidate_indices_subset, candidate_indices.size());
|
||||
|
||||
vector<size_t> best_candidate_indices_subset;
|
||||
float best_efficiency = 0.0f;
|
||||
vector<size_t> trial_subset = out_subset;
|
||||
trial_subset.resize(out_subset.size() + candidate_indices_subset_size);
|
||||
while (true)
|
||||
{
|
||||
while (true) {
|
||||
for (size_t i = 0; i < candidate_indices_subset_size; i++) {
|
||||
trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]];
|
||||
}
|
||||
@ -542,11 +467,9 @@ void find_subset_with_efficiency_higher_than(
|
||||
}
|
||||
}
|
||||
|
||||
void find_partition_with_efficiency_higher_than(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
void find_partition_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
float required_efficiency_to_beat,
|
||||
vector<vector<size_t>>& out_partition)
|
||||
{
|
||||
vector<vector<size_t>>& out_partition) {
|
||||
out_partition.resize(0);
|
||||
|
||||
vector<size_t> remainder;
|
||||
@ -556,25 +479,19 @@ void find_partition_with_efficiency_higher_than(
|
||||
|
||||
while (!remainder.empty()) {
|
||||
vector<size_t> new_subset;
|
||||
find_subset_with_efficiency_higher_than(
|
||||
preprocessed_inputfiles,
|
||||
required_efficiency_to_beat,
|
||||
remainder,
|
||||
find_subset_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, remainder,
|
||||
new_subset);
|
||||
out_partition.push_back(new_subset);
|
||||
}
|
||||
}
|
||||
|
||||
void print_partition(
|
||||
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<vector<size_t>>& partition)
|
||||
{
|
||||
void print_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
|
||||
const vector<vector<size_t>>& partition) {
|
||||
float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
|
||||
cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl;
|
||||
for (auto subset = partition.begin(); subset != partition.end(); ++subset) {
|
||||
cout << " Subset " << (subset - partition.begin())
|
||||
<< ", efficiency " << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:"
|
||||
<< endl;
|
||||
cout << " Subset " << (subset - partition.begin()) << ", efficiency "
|
||||
<< efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" << endl;
|
||||
for (auto file = subset->begin(); file != subset->end(); ++file) {
|
||||
cout << " " << preprocessed_inputfiles[*file].filename << endl;
|
||||
}
|
||||
@ -586,18 +503,18 @@ void print_partition(
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
struct action_t
|
||||
{
|
||||
virtual const char* invokation_name() const { abort(); return nullptr; }
|
||||
struct action_t {
|
||||
virtual const char* invokation_name() const {
|
||||
abort();
|
||||
return nullptr;
|
||||
}
|
||||
virtual void run(const vector<string>&) const { abort(); }
|
||||
virtual ~action_t() {}
|
||||
};
|
||||
|
||||
struct partition_action_t : action_t
|
||||
{
|
||||
struct partition_action_t : action_t {
|
||||
virtual const char* invokation_name() const override { return "partition"; }
|
||||
virtual void run(const vector<string>& input_filenames) const override
|
||||
{
|
||||
virtual void run(const vector<string>& input_filenames) const override {
|
||||
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
|
||||
|
||||
if (input_filenames.empty()) {
|
||||
@ -627,17 +544,12 @@ struct partition_action_t : action_t
|
||||
float required_efficiency_to_beat = 0.0f;
|
||||
vector<vector<vector<size_t>>> partitions;
|
||||
cerr << "searching for partitions...\r" << flush;
|
||||
while (true)
|
||||
{
|
||||
while (true) {
|
||||
vector<vector<size_t>> partition;
|
||||
find_partition_with_efficiency_higher_than(
|
||||
preprocessed_inputfiles,
|
||||
required_efficiency_to_beat,
|
||||
partition);
|
||||
find_partition_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, partition);
|
||||
float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
|
||||
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
|
||||
<< " subsets for " << 100.0f * actual_efficiency
|
||||
<< " % efficiency"
|
||||
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() << " subsets for "
|
||||
<< 100.0f * actual_efficiency << " % efficiency"
|
||||
<< " \r" << flush;
|
||||
partitions.push_back(partition);
|
||||
if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
|
||||
@ -665,8 +577,7 @@ struct partition_action_t : action_t
|
||||
}
|
||||
};
|
||||
|
||||
struct evaluate_defaults_action_t : action_t
|
||||
{
|
||||
struct evaluate_defaults_action_t : action_t {
|
||||
struct results_entry_t {
|
||||
uint16_t product_size;
|
||||
size_triple_t default_block_size;
|
||||
@ -675,30 +586,24 @@ struct evaluate_defaults_action_t : action_t
|
||||
float best_pot_gflops;
|
||||
float default_efficiency;
|
||||
};
|
||||
friend ostream& operator<<(ostream& s, const results_entry_t& entry)
|
||||
{
|
||||
return s
|
||||
<< "Product size " << size_triple_t(entry.product_size)
|
||||
<< ": default block size " << entry.default_block_size
|
||||
<< " -> " << entry.default_gflops
|
||||
friend ostream& operator<<(ostream& s, const results_entry_t& entry) {
|
||||
return s << "Product size " << size_triple_t(entry.product_size) << ": default block size "
|
||||
<< entry.default_block_size << " -> " << entry.default_gflops
|
||||
<< " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
|
||||
<< " of best POT block size " << size_triple_t(entry.best_pot_block_size)
|
||||
<< " -> " << entry.best_pot_gflops
|
||||
<< " GFlop/s" << dec;
|
||||
<< " of best POT block size " << size_triple_t(entry.best_pot_block_size) << " -> "
|
||||
<< entry.best_pot_gflops << " GFlop/s" << dec;
|
||||
}
|
||||
static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
|
||||
return e1.default_efficiency < e2.default_efficiency;
|
||||
}
|
||||
virtual const char* invokation_name() const override { return "evaluate-defaults"; }
|
||||
void show_usage_and_exit() const
|
||||
{
|
||||
void show_usage_and_exit() const {
|
||||
cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
|
||||
cerr << "checks how well the performance with default sizes compares to the best "
|
||||
<< "performance measured over all POT sizes." << endl;
|
||||
exit(1);
|
||||
}
|
||||
virtual void run(const vector<string>& input_filenames) const override
|
||||
{
|
||||
virtual void run(const vector<string>& input_filenames) const override {
|
||||
if (input_filenames.size() != 2) {
|
||||
show_usage_and_exit();
|
||||
}
|
||||
@ -718,16 +623,13 @@ struct evaluate_defaults_action_t : action_t
|
||||
uint16_t product_size = 0;
|
||||
auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
|
||||
for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
|
||||
it_default_sizes != inputfile_default_sizes.entries.end();
|
||||
++it_default_sizes)
|
||||
{
|
||||
it_default_sizes != inputfile_default_sizes.entries.end(); ++it_default_sizes) {
|
||||
if (it_default_sizes->product_size == product_size) {
|
||||
continue;
|
||||
}
|
||||
product_size = it_default_sizes->product_size;
|
||||
while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
|
||||
it_all_pot_sizes->product_size != product_size)
|
||||
{
|
||||
it_all_pot_sizes->product_size != product_size) {
|
||||
++it_all_pot_sizes;
|
||||
}
|
||||
if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
|
||||
@ -735,10 +637,8 @@ struct evaluate_defaults_action_t : action_t
|
||||
}
|
||||
uint16_t best_pot_block_size = 0;
|
||||
float best_pot_gflops = 0;
|
||||
for (auto it = it_all_pot_sizes;
|
||||
it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
|
||||
++it)
|
||||
{
|
||||
for (auto it = it_all_pot_sizes; it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
|
||||
++it) {
|
||||
if (it->gflops > best_pot_gflops) {
|
||||
best_pot_gflops = it->gflops;
|
||||
best_pot_block_size = it->pot_block_size;
|
||||
@ -787,17 +687,14 @@ struct evaluate_defaults_action_t : action_t
|
||||
for (auto it = a.begin(); it != a.end(); ++it) {
|
||||
size_t n = min(results.size() - 1, size_t(*it * results.size()));
|
||||
cout << (100.0f * n / (results.size() - 1))
|
||||
<< " % of product sizes have default efficiency <= "
|
||||
<< 100.0f * results[n].default_efficiency << " %" << endl;
|
||||
<< " % of product sizes have default efficiency <= " << 100.0f * results[n].default_efficiency << " %"
|
||||
<< endl;
|
||||
}
|
||||
cout.precision(default_precision);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void show_usage_and_exit(int argc, char* argv[],
|
||||
const vector<unique_ptr<action_t>>& available_actions)
|
||||
{
|
||||
void show_usage_and_exit(int argc, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
|
||||
cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
|
||||
cerr << "available actions:" << endl;
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
@ -807,8 +704,7 @@ void show_usage_and_exit(int argc, char* argv[],
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
cout.precision(default_precision);
|
||||
cerr.precision(default_precision);
|
||||
|
||||
|
@ -3,8 +3,7 @@
|
||||
#include "BenchUtil.h"
|
||||
#include "basicbenchmark.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int main(int argc, char *argv[]) {
|
||||
DISABLE_SSE_EXCEPTIONS();
|
||||
|
||||
// this is the list of matrix type and size we want to bench:
|
||||
@ -12,18 +11,18 @@ int main(int argc, char *argv[])
|
||||
#define MODES ((3d)(3)(4000000))((4d)(4)(1000000))((Xd)(4)(1000000))((Xd)(20)(10000))
|
||||
// #define MODES ((Xd)(20)(10000))
|
||||
|
||||
#define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
|
||||
#define _GENERATE_HEADER(R, ARG, EL) \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) \
|
||||
<< "-" \
|
||||
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
|
||||
|
||||
std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES) << endl;
|
||||
|
||||
const int tries = 10;
|
||||
|
||||
#define _RUN_BENCH(R, ARG, EL) \
|
||||
std::cout << ARG( \
|
||||
BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\
|
||||
BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \
|
||||
std::cout << ARG(BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL))(BOOST_PP_SEQ_ELEM(1, EL), BOOST_PP_SEQ_ELEM(1, EL)), \
|
||||
BOOST_PP_SEQ_ELEM(2, EL), tries) \
|
||||
<< " ";
|
||||
|
||||
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES);
|
||||
|
@ -8,24 +8,17 @@ template<int Mode, typename MatrixType>
|
||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
|
||||
|
||||
template <int Mode, typename MatrixType>
|
||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations)
|
||||
{
|
||||
for(int a = 0; a < iterations; a++)
|
||||
{
|
||||
if (Mode==LazyEval)
|
||||
{
|
||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) {
|
||||
for (int a = 0; a < iterations; a++) {
|
||||
if (Mode == LazyEval) {
|
||||
asm("#begin_bench_loop LazyEval");
|
||||
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
|
||||
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||
}
|
||||
else if (Mode==OmpEval)
|
||||
{
|
||||
} else if (Mode == OmpEval) {
|
||||
asm("#begin_bench_loop OmpEval");
|
||||
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
|
||||
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
asm("#begin_bench_loop EarlyEval");
|
||||
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
|
||||
m = I + 0.00005 * (m + m * m);
|
||||
@ -38,8 +31,7 @@ template<int Mode, typename MatrixType>
|
||||
double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
|
||||
|
||||
template <int Mode, typename MatrixType>
|
||||
double benchBasic(const MatrixType& mat, int iterations, int tries)
|
||||
{
|
||||
double benchBasic(const MatrixType& mat, int iterations, int tries) {
|
||||
const int rows = mat.rows();
|
||||
const int cols = mat.cols();
|
||||
|
||||
@ -49,8 +41,7 @@ double benchBasic(const MatrixType& mat, int iterations, int tries)
|
||||
initMatrix_identity(I);
|
||||
|
||||
Eigen::BenchTimer timer;
|
||||
for(uint t=0; t<tries; ++t)
|
||||
{
|
||||
for (uint t = 0; t < tries; ++t) {
|
||||
initMatrix_random(m);
|
||||
timer.start();
|
||||
benchBasic_loop<Mode>(I, m, iterations);
|
||||
|
@ -25,59 +25,47 @@ typedef double Scalar;
|
||||
#define CBLAS_GEMM cblas_dgemm
|
||||
#endif
|
||||
|
||||
|
||||
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix;
|
||||
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
|
||||
void check_product(int M, int N, int K);
|
||||
void check_product(void);
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
// disable SSE exceptions
|
||||
#ifdef __GNUC__
|
||||
{
|
||||
int aux;
|
||||
asm(
|
||||
"stmxcsr %[aux] \n\t"
|
||||
asm("stmxcsr %[aux] \n\t"
|
||||
"orl $32832, %[aux] \n\t"
|
||||
"ldmxcsr %[aux] \n\t"
|
||||
: : [aux] "m" (aux));
|
||||
:
|
||||
: [aux] "m"(aux));
|
||||
}
|
||||
#endif
|
||||
|
||||
int nbtries = 1, nbloops = 1, M, N, K;
|
||||
|
||||
if (argc==2)
|
||||
{
|
||||
if (argc == 2) {
|
||||
if (std::string(argv[1]) == "check")
|
||||
check_product();
|
||||
else
|
||||
M = N = K = atoi(argv[1]);
|
||||
}
|
||||
else if ((argc==3) && (std::string(argv[1])=="auto"))
|
||||
{
|
||||
} else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
|
||||
M = N = K = atoi(argv[2]);
|
||||
nbloops = 1000000000 / (M * M * M);
|
||||
if (nbloops<1)
|
||||
nbloops = 1;
|
||||
if (nbloops < 1) nbloops = 1;
|
||||
nbtries = 6;
|
||||
}
|
||||
else if (argc==4)
|
||||
{
|
||||
} else if (argc == 4) {
|
||||
M = N = K = atoi(argv[1]);
|
||||
nbloops = atoi(argv[2]);
|
||||
nbtries = atoi(argv[3]);
|
||||
}
|
||||
else if (argc==6)
|
||||
{
|
||||
} else if (argc == 6) {
|
||||
M = atoi(argv[1]);
|
||||
N = atoi(argv[2]);
|
||||
K = atoi(argv[3]);
|
||||
nbloops = atoi(argv[4]);
|
||||
nbtries = atoi(argv[5]);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
std::cout << "Usage: " << argv[0] << " size \n";
|
||||
std::cout << "Usage: " << argv[0] << " auto size\n";
|
||||
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
|
||||
@ -95,8 +83,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
double nbmad = double(M) * double(N) * double(K) * double(nbloops);
|
||||
|
||||
if (!(std::string(argv[1])=="auto"))
|
||||
std::cout << M << " x " << N << " x " << K << "\n";
|
||||
if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n";
|
||||
|
||||
Scalar alpha, beta;
|
||||
MyMatrix ma(M, K), mb(K, N), mc(M, N);
|
||||
@ -112,17 +99,17 @@ int main(int argc, char *argv[])
|
||||
|
||||
// bench cblas
|
||||
// ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
|
||||
if (!(std::string(argv[1])=="auto"))
|
||||
{
|
||||
if (!(std::string(argv[1]) == "auto")) {
|
||||
timer.reset();
|
||||
for (uint k=0 ; k<nbtries ; ++k)
|
||||
{
|
||||
for (uint k = 0; k < nbtries; ++k) {
|
||||
timer.start();
|
||||
for (uint j = 0; j < nbloops; ++j)
|
||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
|
||||
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta,
|
||||
mc.data(), N);
|
||||
#else
|
||||
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
|
||||
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta,
|
||||
mc.data(), M);
|
||||
#endif
|
||||
timer.stop();
|
||||
}
|
||||
@ -141,8 +128,7 @@ int main(int argc, char *argv[])
|
||||
// if (!(std::string(argv[1])=="auto"))
|
||||
{
|
||||
timer.reset();
|
||||
for (uint k=0 ; k<nbtries ; ++k)
|
||||
{
|
||||
for (uint k = 0; k < nbtries; ++k) {
|
||||
timer.start();
|
||||
bench_eigengemm(mc, ma, mb, nbloops);
|
||||
timer.stop();
|
||||
@ -156,23 +142,20 @@ int main(int argc, char *argv[])
|
||||
std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
|
||||
std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
|
||||
{
|
||||
for (uint j=0 ; j<nbloops ; ++j)
|
||||
mc.noalias() += ma * mb;
|
||||
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) {
|
||||
for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
|
||||
}
|
||||
|
||||
#define MYVERIFY(A,M) if (!(A)) { \
|
||||
#define MYVERIFY(A, M) \
|
||||
if (!(A)) { \
|
||||
std::cout << "FAIL: " << M << "\n"; \
|
||||
}
|
||||
void check_product(int M, int N, int K)
|
||||
{
|
||||
void check_product(int M, int N, int K) {
|
||||
MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N);
|
||||
ma = MyMatrix::Random(M, K);
|
||||
mb = MyMatrix::Random(K, N);
|
||||
@ -203,11 +186,9 @@ void check_product(int M, int N, int K)
|
||||
MYVERIFY(meigen.isApprox(mref, eps), ". * T");
|
||||
}
|
||||
|
||||
void check_product(void)
|
||||
{
|
||||
void check_product(void) {
|
||||
int M, N, K;
|
||||
for (uint i=0; i<1000; ++i)
|
||||
{
|
||||
for (uint i = 0; i < 1000; ++i) {
|
||||
M = internal::random<int>(1, 64);
|
||||
N = internal::random<int>(1, 768);
|
||||
K = internal::random<int>(1, 768);
|
||||
@ -216,4 +197,3 @@ void check_product(void)
|
||||
check_product(M, N, K);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,14 +25,12 @@ using namespace Eigen;
|
||||
typedef float Scalar;
|
||||
|
||||
template <typename MatrixType>
|
||||
__attribute__ ((noinline)) void benchLLT(const MatrixType& m)
|
||||
{
|
||||
__attribute__((noinline)) void benchLLT(const MatrixType& m) {
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
|
||||
double cost = 0;
|
||||
for (int j=0; j<rows; ++j)
|
||||
{
|
||||
for (int j = 0; j < rows; ++j) {
|
||||
int r = std::max(rows - j - 1, 0);
|
||||
cost += 2 * (r * j + r + j);
|
||||
}
|
||||
@ -50,22 +48,18 @@ __attribute__ ((noinline)) void benchLLT(const MatrixType& m)
|
||||
Scalar acc = 0;
|
||||
int r = internal::random<int>(0, covMat.rows() - 1);
|
||||
int c = internal::random<int>(0, covMat.cols() - 1);
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerNoSqrt.start();
|
||||
for (int k=0; k<repeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
LDLT<SquareMatrixType> cholnosqrt(covMat);
|
||||
acc += cholnosqrt.matrixL().coeff(r, c);
|
||||
}
|
||||
timerNoSqrt.stop();
|
||||
}
|
||||
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSqrt.start();
|
||||
for (int k=0; k<repeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
LLT<SquareMatrixType> chol(covMat);
|
||||
acc += chol.matrixL().coeff(r, c);
|
||||
}
|
||||
@ -76,27 +70,21 @@ __attribute__ ((noinline)) void benchLLT(const MatrixType& m)
|
||||
std::cout << "dyn ";
|
||||
else
|
||||
std::cout << "fixed ";
|
||||
std::cout << covMat.rows() << " \t"
|
||||
<< (timerNoSqrt.best()) / repeats << "s "
|
||||
<< "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t"
|
||||
<< (timerSqrt.best()) / repeats << "s "
|
||||
std::cout << covMat.rows() << " \t" << (timerNoSqrt.best()) / repeats << "s "
|
||||
<< "(" << 1e-9 * cost * repeats / timerNoSqrt.best() << " GFLOPS)\t" << (timerSqrt.best()) / repeats << "s "
|
||||
<< "(" << 1e-9 * cost * repeats / timerSqrt.best() << " GFLOPS)\n";
|
||||
|
||||
|
||||
#ifdef BENCH_GSL
|
||||
if (MatrixType::RowsAtCompileTime==Dynamic)
|
||||
{
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic) {
|
||||
timerSqrt.reset();
|
||||
|
||||
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
|
||||
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
|
||||
|
||||
eiToGsl(covMat, &gslCovMat);
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSqrt.start();
|
||||
for (int k=0; k<repeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
gsl_matrix_memcpy(gslCopy, gslCovMat);
|
||||
gsl_linalg_cholesky_decomp(gslCopy);
|
||||
acc += gsl_matrix_get(gslCopy, r, c);
|
||||
@ -104,28 +92,24 @@ __attribute__ ((noinline)) void benchLLT(const MatrixType& m)
|
||||
timerSqrt.stop();
|
||||
}
|
||||
|
||||
std::cout << " | \t"
|
||||
<< timerSqrt.value() * REPEAT / repeats << "s";
|
||||
std::cout << " | \t" << timerSqrt.value() * REPEAT / repeats << "s";
|
||||
|
||||
gsl_matrix_free(gslCovMat);
|
||||
}
|
||||
#endif
|
||||
std::cout << "\n";
|
||||
// make sure the compiler does not optimize too much
|
||||
if (acc==123)
|
||||
std::cout << acc;
|
||||
if (acc == 123) std::cout << acc;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 1500, 0};
|
||||
std::cout << "size LDLT LLT";
|
||||
// #ifdef BENCH_GSL
|
||||
// std::cout << " GSL (standard + double + ATLAS) ";
|
||||
// #endif
|
||||
std::cout << "\n";
|
||||
for (int i=0; dynsizes[i]>0; ++i)
|
||||
benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
|
||||
for (int i = 0; dynsizes[i] > 0; ++i) benchLLT(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
|
||||
|
||||
benchLLT(Matrix<Scalar, 2, 2>());
|
||||
benchLLT(Matrix<Scalar, 3, 3>());
|
||||
@ -138,4 +122,3 @@ int main(int argc, char* argv[])
|
||||
benchLLT(Matrix<Scalar, 16, 16>());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -31,8 +31,7 @@ using namespace Eigen;
|
||||
typedef SCALAR Scalar;
|
||||
|
||||
template <typename MatrixType>
|
||||
__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
{
|
||||
__attribute__((noinline)) void benchEigenSolver(const MatrixType& m) {
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
|
||||
@ -52,11 +51,9 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
int c = internal::random<int>(0, covMat.cols() - 1);
|
||||
{
|
||||
SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSa.start();
|
||||
for (int k=0; k<saRepeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < saRepeats; ++k) {
|
||||
ei.compute(covMat);
|
||||
acc += ei.eigenvectors().coeff(r, c);
|
||||
}
|
||||
@ -66,11 +63,9 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
|
||||
{
|
||||
EigenSolver<SquareMatrixType> ei(covMat);
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerStd.start();
|
||||
for (int k=0; k<stdRepeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < stdRepeats; ++k) {
|
||||
ei.compute(covMat);
|
||||
acc += ei.eigenvectors().coeff(r, c);
|
||||
}
|
||||
@ -82,13 +77,11 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
std::cout << "dyn ";
|
||||
else
|
||||
std::cout << "fixed ";
|
||||
std::cout << covMat.rows() << " \t"
|
||||
<< timerSa.value() * REPEAT / saRepeats << "s \t"
|
||||
std::cout << covMat.rows() << " \t" << timerSa.value() * REPEAT / saRepeats << "s \t"
|
||||
<< timerStd.value() * REPEAT / stdRepeats << "s";
|
||||
|
||||
#ifdef BENCH_GMM
|
||||
if (MatrixType::RowsAtCompileTime==Dynamic)
|
||||
{
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic) {
|
||||
timerSa.reset();
|
||||
timerStd.reset();
|
||||
|
||||
@ -96,11 +89,9 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
gmm::dense_matrix<Scalar> eigvect(covMat.rows(), covMat.cols());
|
||||
std::vector<Scalar> eigval(covMat.rows());
|
||||
eiToGmm(covMat, gmmCovMat);
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSa.start();
|
||||
for (int k=0; k<saRepeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < saRepeats; ++k) {
|
||||
gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
|
||||
acc += eigvect(r, c);
|
||||
}
|
||||
@ -118,15 +109,13 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
// timerStd.stop();
|
||||
// }
|
||||
|
||||
std::cout << " | \t"
|
||||
<< timerSa.value() * REPEAT / saRepeats << "s"
|
||||
std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s"
|
||||
<< /*timerStd.value() * REPEAT / stdRepeats << "s"*/ " na ";
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BENCH_GSL
|
||||
if (MatrixType::RowsAtCompileTime==Dynamic)
|
||||
{
|
||||
if (MatrixType::RowsAtCompileTime == Dynamic) {
|
||||
timerSa.reset();
|
||||
timerStd.reset();
|
||||
|
||||
@ -141,22 +130,18 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
|
||||
|
||||
eiToGsl(covMat, &gslCovMat);
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerSa.start();
|
||||
for (int k=0; k<saRepeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < saRepeats; ++k) {
|
||||
gsl_matrix_memcpy(gslCopy, gslCovMat);
|
||||
gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
|
||||
acc += gsl_matrix_get(eigvect, r, c);
|
||||
}
|
||||
timerSa.stop();
|
||||
}
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerStd.start();
|
||||
for (int k=0; k<stdRepeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < stdRepeats; ++k) {
|
||||
gsl_matrix_memcpy(gslCopy, gslCovMat);
|
||||
gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
|
||||
acc += GSL_REAL(gsl_matrix_complex_get(eigvectz, r, c));
|
||||
@ -164,9 +149,8 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
timerStd.stop();
|
||||
}
|
||||
|
||||
std::cout << " | \t"
|
||||
<< timerSa.value() * REPEAT / saRepeats << "s \t"
|
||||
<< timerStd.value() * REPEAT / stdRepeats << "s";
|
||||
std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s \t" << timerStd.value() * REPEAT / stdRepeats
|
||||
<< "s";
|
||||
|
||||
gsl_matrix_free(gslCovMat);
|
||||
gsl_vector_free(gslCopy);
|
||||
@ -182,12 +166,10 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
||||
std::cout << "\n";
|
||||
|
||||
// make sure the compiler does not optimize too much
|
||||
if (acc==123)
|
||||
std::cout << acc;
|
||||
if (acc == 123) std::cout << acc;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
const int dynsizes[] = {4, 6, 8, 12, 16, 24, 32, 64, 128, 256, 512, 0};
|
||||
std::cout << "size selfadjoint generic";
|
||||
#ifdef BENCH_GMM
|
||||
@ -197,8 +179,7 @@ int main(int argc, char* argv[])
|
||||
std::cout << " GSL (double + ATLAS) ";
|
||||
#endif
|
||||
std::cout << "\n";
|
||||
for (uint i=0; dynsizes[i]>0; ++i)
|
||||
benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
|
||||
for (uint i = 0; dynsizes[i] > 0; ++i) benchEigenSolver(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
|
||||
|
||||
benchEigenSolver(Matrix<Scalar, 2, 2>());
|
||||
benchEigenSolver(Matrix<Scalar, 3, 3>());
|
||||
@ -209,4 +190,3 @@ int main(int argc, char* argv[])
|
||||
benchEigenSolver(Matrix<Scalar, 16, 16>());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -19,13 +19,21 @@
|
||||
using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
|
||||
template <typename T>
|
||||
string nameof();
|
||||
|
||||
template <> string nameof<float>() {return "float";}
|
||||
template <> string nameof<double>() {return "double";}
|
||||
template <> string nameof<long double>() {return "long double";}
|
||||
template <>
|
||||
string nameof<float>() {
|
||||
return "float";
|
||||
}
|
||||
template <>
|
||||
string nameof<double>() {
|
||||
return "double";
|
||||
}
|
||||
template <>
|
||||
string nameof<long double>() {
|
||||
return "long double";
|
||||
}
|
||||
|
||||
#ifndef TYPE
|
||||
#define TYPE float
|
||||
@ -41,8 +49,7 @@ template <> string nameof<long double>() {return "long double";}
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename T>
|
||||
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
|
||||
{
|
||||
void bench(int nfft, bool fwd, bool unscaled = false, bool halfspec = false) {
|
||||
typedef typename NumTraits<T>::Real Scalar;
|
||||
typedef typename std::complex<Scalar> Complex;
|
||||
int nits = NDATA / nfft;
|
||||
@ -59,7 +66,6 @@ void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
|
||||
cout << "halfspec ";
|
||||
}
|
||||
|
||||
|
||||
std::fill(inbuf.begin(), inbuf.end(), 0);
|
||||
fft.fwd(outbuf, inbuf);
|
||||
|
||||
@ -68,11 +74,9 @@ void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
|
||||
for (int k = 0; k < 8; ++k) {
|
||||
timer.start();
|
||||
if (fwd)
|
||||
for(int i = 0; i < nits; i++)
|
||||
fft.fwd( outbuf , inbuf);
|
||||
for (int i = 0; i < nits; i++) fft.fwd(outbuf, inbuf);
|
||||
else
|
||||
for(int i = 0; i < nits; i++)
|
||||
fft.inv(inbuf,outbuf);
|
||||
for (int i = 0; i < nits; i++) fft.inv(inbuf, outbuf);
|
||||
timer.stop();
|
||||
}
|
||||
|
||||
@ -85,7 +89,6 @@ void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
|
||||
mflops /= 2;
|
||||
}
|
||||
|
||||
|
||||
if (fwd)
|
||||
cout << " fwd";
|
||||
else
|
||||
@ -94,8 +97,7 @@ void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
|
||||
cout << " NFFT=" << nfft << " " << (double(1e-6 * nfft * nits) / timer.value()) << " MS/s " << mflops << "MFLOPS\n";
|
||||
}
|
||||
|
||||
int main(int argc,char ** argv)
|
||||
{
|
||||
int main(int argc, char** argv) {
|
||||
bench<complex<float> >(NFFT, true);
|
||||
bench<complex<float> >(NFFT, false);
|
||||
bench<float>(NFFT, true);
|
||||
|
@ -11,52 +11,42 @@ using namespace std;
|
||||
#define REPEAT 1000000
|
||||
#endif
|
||||
|
||||
enum func_opt
|
||||
{
|
||||
enum func_opt {
|
||||
TV,
|
||||
TMATV,
|
||||
TMATVMAT,
|
||||
};
|
||||
|
||||
|
||||
template <class res, class arg1, class arg2, int opt>
|
||||
struct func;
|
||||
|
||||
template <class res, class arg1, class arg2>
|
||||
struct func<res, arg1, arg2, TV>
|
||||
{
|
||||
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
|
||||
{
|
||||
struct func<res, arg1, arg2, TV> {
|
||||
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
|
||||
asm("");
|
||||
return a1 * a2;
|
||||
}
|
||||
};
|
||||
|
||||
template <class res, class arg1, class arg2>
|
||||
struct func<res, arg1, arg2, TMATV>
|
||||
{
|
||||
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
|
||||
{
|
||||
struct func<res, arg1, arg2, TMATV> {
|
||||
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
|
||||
asm("");
|
||||
return a1.matrix() * a2;
|
||||
}
|
||||
};
|
||||
|
||||
template <class res, class arg1, class arg2>
|
||||
struct func<res, arg1, arg2, TMATVMAT>
|
||||
{
|
||||
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
|
||||
{
|
||||
struct func<res, arg1, arg2, TMATVMAT> {
|
||||
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
|
||||
asm("");
|
||||
return res(a1.matrix() * a2.matrix());
|
||||
}
|
||||
};
|
||||
|
||||
template <class func, class arg1, class arg2>
|
||||
struct test_transform
|
||||
{
|
||||
static void run()
|
||||
{
|
||||
struct test_transform {
|
||||
static void run() {
|
||||
arg1 a1;
|
||||
a1.setIdentity();
|
||||
arg2 a2;
|
||||
@ -64,18 +54,16 @@ struct test_transform
|
||||
|
||||
BenchTimer timer;
|
||||
timer.reset();
|
||||
for (int k=0; k<10; ++k)
|
||||
{
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
timer.start();
|
||||
for (int k=0; k<REPEAT; ++k)
|
||||
a2 = func::run( a1, a2 );
|
||||
for (int k = 0; k < REPEAT; ++k) a2 = func::run(a1, a2);
|
||||
timer.stop();
|
||||
}
|
||||
cout << setprecision(4) << fixed << timer.value() << "s " << endl;;
|
||||
cout << setprecision(4) << fixed << timer.value() << "s " << endl;
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#define run_vec(op, scalar, mode, option, vsize) \
|
||||
std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
|
||||
{ \
|
||||
@ -93,8 +81,7 @@ struct test_transform
|
||||
test_transform<Func, Trans, Trans>::run(); \
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
cout << "vec = trans * vec" << endl;
|
||||
run_vec(TV, float, Isometry, AutoAlign, 3);
|
||||
run_vec(TV, float, Isometry, DontAlign, 3);
|
||||
@ -131,4 +118,3 @@ int main(int argc, char* argv[])
|
||||
run_trans(TMATVMAT, double, Isometry, AutoAlign);
|
||||
run_trans(TMATVMAT, double, Isometry, DontAlign);
|
||||
}
|
||||
|
||||
|
@ -18,46 +18,42 @@ __attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int si
|
||||
__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
|
||||
__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
int size = SIZE * 8;
|
||||
int size2 = size * size;
|
||||
Scalar* a = internal::aligned_new<Scalar>(size2);
|
||||
Scalar* b = internal::aligned_new<Scalar>(size2 + 4) + 1;
|
||||
Scalar* c = internal::aligned_new<Scalar>(size2);
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
{
|
||||
for (int i = 0; i < size; ++i) {
|
||||
a[i] = b[i] = c[i] = 0;
|
||||
}
|
||||
|
||||
BenchTimer timer;
|
||||
|
||||
timer.reset();
|
||||
for (int k=0; k<10; ++k)
|
||||
{
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
timer.start();
|
||||
benchVec(a, b, c, size2);
|
||||
timer.stop();
|
||||
}
|
||||
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
|
||||
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
|
||||
<< " GFlops\n";
|
||||
return 0;
|
||||
for (int innersize = size; innersize>2 ; --innersize)
|
||||
{
|
||||
if (size2%innersize==0)
|
||||
{
|
||||
for (int innersize = size; innersize > 2; --innersize) {
|
||||
if (size2 % innersize == 0) {
|
||||
int outersize = size2 / innersize;
|
||||
MatrixXf ma = Map<MatrixXf>(a, innersize, outersize);
|
||||
MatrixXf mb = Map<MatrixXf>(b, innersize, outersize);
|
||||
MatrixXf mc = Map<MatrixXf>(c, innersize, outersize);
|
||||
timer.reset();
|
||||
for (int k=0; k<3; ++k)
|
||||
{
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
timer.start();
|
||||
benchVec(ma, mb, mc);
|
||||
timer.stop();
|
||||
}
|
||||
std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
|
||||
std::cout << innersize << " x " << outersize << " " << timer.value() << "s "
|
||||
<< (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n";
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,37 +61,31 @@ int main(int argc, char* argv[])
|
||||
VectorXf vb = Map<VectorXf>(b, size2);
|
||||
VectorXf vc = Map<VectorXf>(c, size2);
|
||||
timer.reset();
|
||||
for (int k=0; k<3; ++k)
|
||||
{
|
||||
for (int k = 0; k < 3; ++k) {
|
||||
timer.start();
|
||||
benchVec(va, vb, vc);
|
||||
timer.stop();
|
||||
}
|
||||
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
|
||||
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
|
||||
<< " GFlops\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
|
||||
{
|
||||
for (int k=0; k<REPEAT; ++k)
|
||||
a = a + b;
|
||||
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) {
|
||||
for (int k = 0; k < REPEAT; ++k) a = a + b;
|
||||
}
|
||||
|
||||
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
|
||||
{
|
||||
for (int k=0; k<REPEAT; ++k)
|
||||
a = a + b;
|
||||
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) {
|
||||
for (int k = 0; k < REPEAT; ++k) a = a + b;
|
||||
}
|
||||
|
||||
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
|
||||
{
|
||||
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) {
|
||||
typedef internal::packet_traits<Scalar>::type PacketScalar;
|
||||
const int PacketSize = internal::packet_traits<Scalar>::size;
|
||||
PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
|
||||
for (int k = 0; k < REPEAT; ++k)
|
||||
for (int i=0; i<size; i+=PacketSize*8)
|
||||
{
|
||||
for (int i = 0; i < size; i += PacketSize * 8) {
|
||||
// a0 = internal::pload(&a[i]);
|
||||
// b0 = internal::pload(&b[i]);
|
||||
// a1 = internal::pload(&a[i+1*PacketSize]);
|
||||
@ -125,11 +115,17 @@ void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
|
||||
// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
|
||||
// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
|
||||
|
||||
internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
|
||||
internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
|
||||
internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
|
||||
internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
|
||||
internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
|
||||
internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
|
||||
internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]),
|
||||
internal::ploadu(&b[i + 2 * PacketSize])));
|
||||
internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]),
|
||||
internal::ploadu(&b[i + 3 * PacketSize])));
|
||||
internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]),
|
||||
internal::ploadu(&b[i + 4 * PacketSize])));
|
||||
internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]),
|
||||
internal::ploadu(&b[i + 5 * PacketSize])));
|
||||
internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]),
|
||||
internal::ploadu(&b[i + 6 * PacketSize])));
|
||||
internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]),
|
||||
internal::ploadu(&b[i + 7 * PacketSize])));
|
||||
}
|
||||
}
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <bench/BenchTimer.h>
|
||||
#include <Eigen/Core>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
@ -84,59 +83,60 @@ const char transB = notrans;
|
||||
#endif
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXf& c)
|
||||
{
|
||||
int M = c.rows(); int N = c.cols(); int K = a.cols();
|
||||
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
|
||||
void blas_gemm(const A& a, const B& b, MatrixXf& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
sgemm_(&transA,&transB,&M,&N,&K,&fone,
|
||||
const_cast<float*>(a.data()),&lda,
|
||||
const_cast<float*>(b.data()),&ldb,&fone,
|
||||
c.data(),&ldc);
|
||||
sgemm_(&transA, &transB, &M, &N, &K, &fone, const_cast<float*>(a.data()), &lda, const_cast<float*>(b.data()), &ldb,
|
||||
&fone, c.data(), &ldc);
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXd& c)
|
||||
{
|
||||
int M = c.rows(); int N = c.cols(); int K = a.cols();
|
||||
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
|
||||
void blas_gemm(const A& a, const B& b, MatrixXd& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
dgemm_(&transA,&transB,&M,&N,&K,&done,
|
||||
const_cast<double*>(a.data()),&lda,
|
||||
const_cast<double*>(b.data()),&ldb,&done,
|
||||
c.data(),&ldc);
|
||||
dgemm_(&transA, &transB, &M, &N, &K, &done, const_cast<double*>(a.data()), &lda, const_cast<double*>(b.data()), &ldb,
|
||||
&done, c.data(), &ldc);
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXcf& c)
|
||||
{
|
||||
int M = c.rows(); int N = c.cols(); int K = a.cols();
|
||||
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
|
||||
void blas_gemm(const A& a, const B& b, MatrixXcf& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
cgemm_(&transA,&transB,&M,&N,&K,(float*)&cfone,
|
||||
const_cast<float*>((const float*)a.data()),&lda,
|
||||
const_cast<float*>((const float*)b.data()),&ldb,(float*)&cfone,
|
||||
(float*)c.data(),&ldc);
|
||||
cgemm_(&transA, &transB, &M, &N, &K, (float*)&cfone, const_cast<float*>((const float*)a.data()), &lda,
|
||||
const_cast<float*>((const float*)b.data()), &ldb, (float*)&cfone, (float*)c.data(), &ldc);
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
void blas_gemm(const A& a, const B& b, MatrixXcd& c)
|
||||
{
|
||||
int M = c.rows(); int N = c.cols(); int K = a.cols();
|
||||
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
|
||||
void blas_gemm(const A& a, const B& b, MatrixXcd& c) {
|
||||
int M = c.rows();
|
||||
int N = c.cols();
|
||||
int K = a.cols();
|
||||
int lda = a.outerStride();
|
||||
int ldb = b.outerStride();
|
||||
int ldc = c.rows();
|
||||
|
||||
zgemm_(&transA,&transB,&M,&N,&K,(double*)&cdone,
|
||||
const_cast<double*>((const double*)a.data()),&lda,
|
||||
const_cast<double*>((const double*)b.data()),&ldb,(double*)&cdone,
|
||||
(double*)c.data(),&ldc);
|
||||
zgemm_(&transA, &transB, &M, &N, &K, (double*)&cdone, const_cast<double*>((const double*)a.data()), &lda,
|
||||
const_cast<double*>((const double*)b.data()), &ldb, (double*)&cdone, (double*)c.data(), &ldc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci)
|
||||
{
|
||||
void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) {
|
||||
cr.noalias() += ar * br;
|
||||
cr.noalias() -= ai * bi;
|
||||
ci.noalias() += ar * bi;
|
||||
@ -144,28 +144,22 @@ void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr,
|
||||
// [cr ci] += [ar ai] * br + [-ai ar] * bi
|
||||
}
|
||||
|
||||
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci)
|
||||
{
|
||||
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) {
|
||||
cr.noalias() += a * br;
|
||||
ci.noalias() += a * bi;
|
||||
}
|
||||
|
||||
void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci)
|
||||
{
|
||||
void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) {
|
||||
cr.noalias() += ar * b;
|
||||
ci.noalias() += ai * b;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <typename A, typename B, typename C>
|
||||
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c)
|
||||
{
|
||||
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) {
|
||||
c.noalias() += a * b;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
int main(int argc, char** argv) {
|
||||
std::ptrdiff_t l1 = internal::queryL1CacheSize();
|
||||
std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
|
||||
std::cout << "L1 cache size = " << (l1 > 0 ? l1 / 1024 : -1) << " KB\n";
|
||||
@ -183,52 +177,37 @@ int main(int argc, char ** argv)
|
||||
int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0;
|
||||
|
||||
bool need_help = false;
|
||||
for (int i=1; i<argc;)
|
||||
{
|
||||
if(argv[i][0]=='-')
|
||||
{
|
||||
if(argv[i][1]=='s')
|
||||
{
|
||||
for (int i = 1; i < argc;) {
|
||||
if (argv[i][0] == '-') {
|
||||
if (argv[i][1] == 's') {
|
||||
++i;
|
||||
s = atoi(argv[i++]);
|
||||
m = n = p = s;
|
||||
if(argv[i][0]!='-')
|
||||
{
|
||||
if (argv[i][0] != '-') {
|
||||
n = atoi(argv[i++]);
|
||||
p = atoi(argv[i++]);
|
||||
}
|
||||
}
|
||||
else if(argv[i][1]=='c')
|
||||
{
|
||||
} else if (argv[i][1] == 'c') {
|
||||
++i;
|
||||
cache_size1 = atoi(argv[i++]);
|
||||
if(argv[i][0]!='-')
|
||||
{
|
||||
if (argv[i][0] != '-') {
|
||||
cache_size2 = atoi(argv[i++]);
|
||||
if(argv[i][0]!='-')
|
||||
cache_size3 = atoi(argv[i++]);
|
||||
if (argv[i][0] != '-') cache_size3 = atoi(argv[i++]);
|
||||
}
|
||||
}
|
||||
else if(argv[i][1]=='t')
|
||||
{
|
||||
} else if (argv[i][1] == 't') {
|
||||
tries = atoi(argv[++i]);
|
||||
++i;
|
||||
}
|
||||
else if(argv[i][1]=='p')
|
||||
{
|
||||
} else if (argv[i][1] == 'p') {
|
||||
++i;
|
||||
rep = atoi(argv[i++]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
need_help = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(need_help)
|
||||
{
|
||||
if (need_help) {
|
||||
std::cout << argv[0] << " -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
|
||||
std::cout << " <matrix sizes> : size\n";
|
||||
std::cout << " <matrix sizes> : rows columns depth\n";
|
||||
@ -236,13 +215,15 @@ int main(int argc, char ** argv)
|
||||
}
|
||||
|
||||
#if EIGEN_VERSION_AT_LEAST(3, 2, 90)
|
||||
if(cache_size1>0)
|
||||
setCpuCacheSizes(cache_size1,cache_size2,cache_size3);
|
||||
if (cache_size1 > 0) setCpuCacheSizes(cache_size1, cache_size2, cache_size3);
|
||||
#endif
|
||||
|
||||
A a(m,p); a.setRandom();
|
||||
B b(p,n); b.setRandom();
|
||||
C c(m,n); c.setOnes();
|
||||
A a(m, p);
|
||||
a.setRandom();
|
||||
B b(p, n);
|
||||
b.setRandom();
|
||||
C c(m, n);
|
||||
c.setOnes();
|
||||
C rc = c;
|
||||
|
||||
std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
|
||||
@ -256,8 +237,7 @@ int main(int argc, char ** argv)
|
||||
#if defined EIGEN_HAS_OPENMP
|
||||
Eigen::initParallel();
|
||||
int procs = omp_get_max_threads();
|
||||
if(procs>1)
|
||||
{
|
||||
if (procs > 1) {
|
||||
#ifdef HAVE_BLAS
|
||||
blas_gemm(a, b, r);
|
||||
#else
|
||||
@ -276,8 +256,7 @@ int main(int argc, char ** argv)
|
||||
std::cerr << "Warning, your product is crap!\n\n";
|
||||
}
|
||||
#else
|
||||
if(1.*m*n*p<2000.*2000*2000)
|
||||
{
|
||||
if (1. * m * n * p < 2000. * 2000 * 2000) {
|
||||
gemm(a, b, c);
|
||||
r.noalias() += a.cast<Scalar>().lazyProduct(b.cast<Scalar>());
|
||||
if (!r.isApprox(c)) {
|
||||
@ -291,8 +270,12 @@ int main(int argc, char ** argv)
|
||||
BenchTimer tblas;
|
||||
c = rc;
|
||||
BENCH(tblas, tries, rep, blas_gemm(a, b, c));
|
||||
std::cout << "blas cpu " << tblas.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "blas real " << tblas.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "blas cpu " << tblas.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tblas.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "blas real " << tblas.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tblas.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
#endif
|
||||
|
||||
// warm start
|
||||
@ -301,75 +284,110 @@ int main(int argc, char ** argv)
|
||||
BenchTimer tmt;
|
||||
c = rc;
|
||||
BENCH(tmt, tries, rep, gemm(a, b, c));
|
||||
std::cout << "eigen cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "eigen real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "eigen cpu " << tmt.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "eigen real " << tmt.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
|
||||
#ifdef EIGEN_HAS_OPENMP
|
||||
if(procs>1)
|
||||
{
|
||||
if (procs > 1) {
|
||||
BenchTimer tmono;
|
||||
omp_set_num_threads(1);
|
||||
Eigen::setNbThreads(1);
|
||||
c = rc;
|
||||
BENCH(tmono, tries, rep, gemm(a, b, c));
|
||||
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "eigen mono real " << tmono.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n";
|
||||
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmono.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "eigen mono real " << tmono.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmono.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t("
|
||||
<< tmono.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => "
|
||||
<< (100.0 * tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)) / procs << "%\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
if(1.*m*n*p<30*30*30)
|
||||
{
|
||||
if (1. * m * n * p < 30 * 30 * 30) {
|
||||
BenchTimer tmt;
|
||||
c = rc;
|
||||
BENCH(tmt, tries, rep, c.noalias() += a.lazyProduct(b));
|
||||
std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "lazy cpu " << tmt.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "lazy real " << tmt.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
|
||||
#ifdef DECOUPLED
|
||||
if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
|
||||
{
|
||||
M ar(m,p); ar.setRandom();
|
||||
M ai(m,p); ai.setRandom();
|
||||
M br(p,n); br.setRandom();
|
||||
M bi(p,n); bi.setRandom();
|
||||
M cr(m,n); cr.setRandom();
|
||||
M ci(m,n); ci.setRandom();
|
||||
if ((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
|
||||
M ar(m, p);
|
||||
ar.setRandom();
|
||||
M ai(m, p);
|
||||
ai.setRandom();
|
||||
M br(p, n);
|
||||
br.setRandom();
|
||||
M bi(p, n);
|
||||
bi.setRandom();
|
||||
M cr(m, n);
|
||||
cr.setRandom();
|
||||
M ci(m, n);
|
||||
ci.setRandom();
|
||||
|
||||
BenchTimer t;
|
||||
BENCH(t, tries, rep, matlab_cplx_cplx(ar, ai, br, bi, cr, ci));
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
|
||||
{
|
||||
M a(m,p); a.setRandom();
|
||||
M br(p,n); br.setRandom();
|
||||
M bi(p,n); bi.setRandom();
|
||||
M cr(m,n); cr.setRandom();
|
||||
M ci(m,n); ci.setRandom();
|
||||
if ((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
|
||||
M a(m, p);
|
||||
a.setRandom();
|
||||
M br(p, n);
|
||||
br.setRandom();
|
||||
M bi(p, n);
|
||||
bi.setRandom();
|
||||
M cr(m, n);
|
||||
cr.setRandom();
|
||||
M ci(m, n);
|
||||
ci.setRandom();
|
||||
|
||||
BenchTimer t;
|
||||
BENCH(t, tries, rep, matlab_real_cplx(a, br, bi, cr, ci));
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex))
|
||||
{
|
||||
M ar(m,p); ar.setRandom();
|
||||
M ai(m,p); ai.setRandom();
|
||||
M b(p,n); b.setRandom();
|
||||
M cr(m,n); cr.setRandom();
|
||||
M ci(m,n); ci.setRandom();
|
||||
if ((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex)) {
|
||||
M ar(m, p);
|
||||
ar.setRandom();
|
||||
M ai(m, p);
|
||||
ai.setRandom();
|
||||
M b(p, n);
|
||||
b.setRandom();
|
||||
M cr(m, n);
|
||||
cr.setRandom();
|
||||
M ci(m, n);
|
||||
ci.setRandom();
|
||||
|
||||
BenchTimer t;
|
||||
BENCH(t, tries, rep, matlab_cplx_real(ar, ai, b, cr, ci));
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
|
||||
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
|
||||
<< "s)\n";
|
||||
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
|
||||
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
|
||||
<< "s)\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -16,22 +16,19 @@
|
||||
#include <utility>
|
||||
|
||||
template <typename MatrixType>
|
||||
void copy_matrix(MatrixType& m)
|
||||
{
|
||||
void copy_matrix(MatrixType& m) {
|
||||
MatrixType tmp(m);
|
||||
m = tmp;
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
void move_matrix(MatrixType&& m)
|
||||
{
|
||||
void move_matrix(MatrixType&& m) {
|
||||
MatrixType tmp(std::move(m));
|
||||
m = std::move(tmp);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void bench(const std::string& label)
|
||||
{
|
||||
void bench(const std::string& label) {
|
||||
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
|
||||
Eigen::BenchTimer t;
|
||||
|
||||
@ -48,10 +45,8 @@ void bench(const std::string& label)
|
||||
std::cout << label << " move semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<float>("float");
|
||||
bench<double>("double");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -6,45 +6,36 @@ using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) {
|
||||
return v.norm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) {
|
||||
return v.stableNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) {
|
||||
return v.hypotNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) {
|
||||
return v.blueNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) {
|
||||
typedef typename T::Scalar Scalar;
|
||||
int n = v.size();
|
||||
Scalar scale = 0;
|
||||
Scalar ssq = 1;
|
||||
for (int i=0;i<n;++i)
|
||||
{
|
||||
for (int i = 0; i < n; ++i) {
|
||||
Scalar ax = std::abs(v.coeff(i));
|
||||
if (scale >= ax)
|
||||
{
|
||||
if (scale >= ax) {
|
||||
ssq += numext::abs2(ax / scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
ssq = Scalar(1) + ssq * numext::abs2(scale / ax);
|
||||
scale = ax;
|
||||
}
|
||||
@ -53,30 +44,24 @@ EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) {
|
||||
typedef typename T::Scalar Scalar;
|
||||
Scalar s = v.array().abs().maxCoeff();
|
||||
return s * (v / s).norm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) {
|
||||
return v.stableNorm();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) {
|
||||
int n = v.size() / 2;
|
||||
for (int i=0;i<n;++i)
|
||||
v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
|
||||
for (int i = 0; i < n; ++i) v(i) = v(2 * i) * v(2 * i) + v(2 * i + 1) * v(2 * i + 1);
|
||||
n = n / 2;
|
||||
while (n>0)
|
||||
{
|
||||
for (int i=0;i<n;++i)
|
||||
v(i) = v(2*i) + v(2*i+1);
|
||||
while (n > 0) {
|
||||
for (int i = 0; i < n; ++i) v(i) = v(2 * i) + v(2 * i + 1);
|
||||
n = n / 2;
|
||||
}
|
||||
return std::sqrt(v(0));
|
||||
@ -91,12 +76,11 @@ Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
|
||||
Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a, b); }
|
||||
Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a, b); }
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
{
|
||||
EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) {
|
||||
#ifndef EIGEN_VECTORIZE
|
||||
return v.blueNorm();
|
||||
#else
|
||||
@ -106,22 +90,21 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
|
||||
int n;
|
||||
|
||||
if(nmax <= 0)
|
||||
{
|
||||
if (nmax <= 0) {
|
||||
int nbig, ibeta, it, iemin, iemax, iexp;
|
||||
Scalar abig, eps;
|
||||
|
||||
nbig = NumTraits<int>::highest(); // largest integer
|
||||
ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base; // base for floating-point numbers
|
||||
it = NumTraits<Scalar>::digits(); // NumTraits<Scalar>::Mantissa; // number of base-beta digits in mantissa
|
||||
ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base; // base for
|
||||
// floating-point numbers
|
||||
it = NumTraits<Scalar>::digits(); // NumTraits<Scalar>::Mantissa; // number of base-beta digits in
|
||||
// mantissa
|
||||
iemin = NumTraits<Scalar>::min_exponent(); // minimum exponent
|
||||
iemax = NumTraits<Scalar>::max_exponent(); // maximum exponent
|
||||
rbig = NumTraits<Scalar>::highest(); // largest floating-point number
|
||||
|
||||
// Check the basic machine-dependent constants.
|
||||
if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
|
||||
|| (it<=4 && ibeta <= 3 ) || it<2)
|
||||
{
|
||||
if (iemin > 1 - 2 * it || 1 + it > iemax || (it == 2 && ibeta < 5) || (it <= 4 && ibeta <= 3) || it < 2) {
|
||||
eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
|
||||
}
|
||||
iexp = -((1 - iemin) / 2);
|
||||
@ -138,8 +121,10 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
eps = std::pow(ibeta, 1 - it);
|
||||
relerr = std::sqrt(eps); // tolerance for neglecting asml
|
||||
abig = 1.0 / eps - 1.0;
|
||||
if (Scalar(nbig)>abig) nmax = abig; // largest safe n
|
||||
else nmax = nbig;
|
||||
if (Scalar(nbig) > abig)
|
||||
nmax = abig; // largest safe n
|
||||
else
|
||||
nmax = nbig;
|
||||
}
|
||||
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
@ -151,8 +136,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
Packet ps1m = internal::pset1<Packet>(s1m);
|
||||
Packet pb2 = internal::pset1<Packet>(b2);
|
||||
Packet pb1 = internal::pset1<Packet>(b1);
|
||||
for(int j=0; j<v.size(); j+=ps)
|
||||
{
|
||||
for (int j = 0; j < v.size(); j += ps) {
|
||||
Packet ax = internal::pabs(v.template packet<Aligned>(j));
|
||||
Packet ax_s2m = internal::pmul(ax, ps2m);
|
||||
Packet ax_s1m = internal::pmul(ax, ps1m);
|
||||
@ -170,7 +154,6 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
// pasml = internal::padd(pasml, internal::pand(maskSml, ax));
|
||||
// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
|
||||
|
||||
|
||||
pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m, ax_s2m)));
|
||||
pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m, ax_s1m)));
|
||||
pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax, ax), internal::pand(maskSml, maskBig)));
|
||||
@ -178,39 +161,27 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
Scalar abig = internal::predux(pabig);
|
||||
Scalar asml = internal::predux(pasml);
|
||||
Scalar amed = internal::predux(pamed);
|
||||
if(abig > Scalar(0))
|
||||
{
|
||||
if (abig > Scalar(0)) {
|
||||
abig = std::sqrt(abig);
|
||||
if(abig > overfl)
|
||||
{
|
||||
if (abig > overfl) {
|
||||
eigen_assert(false && "overflow");
|
||||
return rbig;
|
||||
}
|
||||
if(amed > Scalar(0))
|
||||
{
|
||||
if (amed > Scalar(0)) {
|
||||
abig = abig / s2m;
|
||||
amed = std::sqrt(amed);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return abig / s2m;
|
||||
}
|
||||
|
||||
}
|
||||
else if(asml > Scalar(0))
|
||||
{
|
||||
if (amed > Scalar(0))
|
||||
{
|
||||
} else if (asml > Scalar(0)) {
|
||||
if (amed > Scalar(0)) {
|
||||
abig = std::sqrt(amed);
|
||||
amed = std::sqrt(asml) / s1m;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return std::sqrt(asml) / s1m;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return std::sqrt(amed);
|
||||
}
|
||||
asml = std::min(abig, amed);
|
||||
@ -222,17 +193,27 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define BENCH_PERF(NRM) { \
|
||||
float af = 0; double ad = 0; std::complex<float> ac = 0; \
|
||||
Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
|
||||
#define BENCH_PERF(NRM) \
|
||||
{ \
|
||||
float af = 0; \
|
||||
double ad = 0; \
|
||||
std::complex<float> ac = 0; \
|
||||
Eigen::BenchTimer tf, td, tcf; \
|
||||
tf.reset(); \
|
||||
td.reset(); \
|
||||
tcf.reset(); \
|
||||
for (int k = 0; k < tries; ++k) { \
|
||||
tf.start(); \
|
||||
for (int i=0; i<iters; ++i) { af += NRM(vf); } \
|
||||
for (int i = 0; i < iters; ++i) { \
|
||||
af += NRM(vf); \
|
||||
} \
|
||||
tf.stop(); \
|
||||
} \
|
||||
for (int k = 0; k < tries; ++k) { \
|
||||
td.start(); \
|
||||
for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
|
||||
for (int i = 0; i < iters; ++i) { \
|
||||
ad += NRM(vd); \
|
||||
} \
|
||||
td.stop(); \
|
||||
} \
|
||||
/*for (int k=0; k<std::max(1,tries/3); ++k) { \
|
||||
@ -243,8 +224,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
|
||||
std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
|
||||
}
|
||||
|
||||
void check_accuracy(double basef, double based, int s)
|
||||
{
|
||||
void check_accuracy(double basef, double based, int s) {
|
||||
double yf = basef * std::abs(internal::random<double>());
|
||||
double yd = based * std::abs(internal::random<double>());
|
||||
VectorXf vf = VectorXf::Ones(s) * yf;
|
||||
@ -260,28 +240,32 @@ void check_accuracy(double basef, double based, int s)
|
||||
std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
|
||||
}
|
||||
|
||||
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
|
||||
{
|
||||
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) {
|
||||
VectorXf vf(s);
|
||||
VectorXd vd(s);
|
||||
for (int i=0; i<s; ++i)
|
||||
{
|
||||
for (int i = 0; i < s; ++i) {
|
||||
vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0, ef1));
|
||||
vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0, ed1));
|
||||
}
|
||||
|
||||
// std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
|
||||
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
|
||||
// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>())
|
||||
<< "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>())
|
||||
<< "\t" << hypotNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t"
|
||||
<< blueNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>())
|
||||
<< "\t" << blueNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>())
|
||||
<< "\t" << lapackNorm(vd.cast<long double>()) << "\n";
|
||||
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t"
|
||||
<< twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
|
||||
// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long
|
||||
// double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
int main(int argc, char** argv) {
|
||||
int tries = 10;
|
||||
int iters = 100000;
|
||||
double y = 1.1345743233455785456788e12 * internal::random<double>();
|
||||
@ -310,15 +294,13 @@ int main(int argc, char** argv)
|
||||
check_accuracy(basef_over, based_over, s);
|
||||
|
||||
std::cerr << "\nVarying (over):\n";
|
||||
for (int k=0; k<1; ++k)
|
||||
{
|
||||
for (int k = 0; k < 1; ++k) {
|
||||
check_accuracy_var(20, 27, 190, 302, s);
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
std::cerr << "\nVarying (under):\n";
|
||||
for (int k=0; k<1; ++k)
|
||||
{
|
||||
for (int k = 0; k < 1; ++k) {
|
||||
check_accuracy_var(-27, 20, -302, -190, s);
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
@ -15,8 +15,7 @@ using namespace Eigen;
|
||||
typedef double Scalar;
|
||||
|
||||
template <typename MatrixType>
|
||||
__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
|
||||
{
|
||||
__attribute__((noinline)) void bench_reverse(const MatrixType& m) {
|
||||
int rows = m.rows();
|
||||
int cols = m.cols();
|
||||
int size = m.size();
|
||||
@ -30,11 +29,9 @@ __attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
|
||||
Scalar acc = 0;
|
||||
int r = internal::random<int>(0, rows - 1);
|
||||
int c = internal::random<int>(0, cols - 1);
|
||||
for (int t=0; t<TRIES; ++t)
|
||||
{
|
||||
for (int t = 0; t < TRIES; ++t) {
|
||||
timerB.start();
|
||||
for (int k=0; k<repeats; ++k)
|
||||
{
|
||||
for (int k = 0; k < repeats; ++k) {
|
||||
asm("#begin foo");
|
||||
b = a.reverse();
|
||||
asm("#end foo");
|
||||
@ -47,26 +44,22 @@ __attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
|
||||
std::cout << "dyn ";
|
||||
else
|
||||
std::cout << "fixed ";
|
||||
std::cout << rows << " x " << cols << " \t"
|
||||
<< (timerB.value() * REPEAT) / repeats << "s "
|
||||
std::cout << rows << " x " << cols << " \t" << (timerB.value() * REPEAT) / repeats << "s "
|
||||
<< "(" << 1e-6 * size * repeats / timerB.value() << " MFLOPS)\t";
|
||||
|
||||
std::cout << "\n";
|
||||
// make sure the compiler does not optimize too much
|
||||
if (acc==123)
|
||||
std::cout << acc;
|
||||
if (acc == 123) std::cout << acc;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 0};
|
||||
std::cout << "size no sqrt standard";
|
||||
// #ifdef BENCH_GSL
|
||||
// std::cout << " GSL (standard + double + ATLAS) ";
|
||||
// #endif
|
||||
std::cout << "\n";
|
||||
for (uint i=0; dynsizes[i]>0; ++i)
|
||||
{
|
||||
for (uint i = 0; dynsizes[i] > 0; ++i) {
|
||||
bench_reverse(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
|
||||
bench_reverse(Matrix<Scalar, Dynamic, 1>(dynsizes[i] * dynsizes[i]));
|
||||
}
|
||||
@ -81,4 +74,3 @@ int main(int argc, char* argv[])
|
||||
// bench_reverse(Matrix<Scalar,16,16>());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3,15 +3,13 @@
|
||||
using namespace Eigen;
|
||||
using namespace std;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
typedef Matrix<SCALAR, Eigen::Dynamic, 1> Vec;
|
||||
Vec v(SIZE);
|
||||
v.setZero();
|
||||
v[0] = 1;
|
||||
v[1] = 2;
|
||||
for(int i = 0; i < 1000000; i++)
|
||||
{
|
||||
for (int i = 0; i < 1000000; i++) {
|
||||
v.coeffRef(0) += v.sum() * SCALAR(1e-20);
|
||||
}
|
||||
cout << v.sum() << endl;
|
||||
|
@ -59,14 +59,12 @@ static_assert(maxsize > minsize, "maxsize must be larger than minsize");
|
||||
static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
|
||||
|
||||
// just a helper to store a triple of K,M,N sizes for matrix product
|
||||
struct size_triple_t
|
||||
{
|
||||
struct size_triple_t {
|
||||
size_t k, m, n;
|
||||
size_triple_t() : k(0), m(0), n(0) {}
|
||||
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
|
||||
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
|
||||
size_triple_t(uint16_t compact)
|
||||
{
|
||||
size_triple_t(uint16_t compact) {
|
||||
k = 1 << ((compact & 0xf00) >> 8);
|
||||
m = 1 << ((compact & 0x0f0) >> 4);
|
||||
n = 1 << ((compact & 0x00f) >> 0);
|
||||
@ -82,50 +80,35 @@ uint8_t log2_pot(size_t x) {
|
||||
// Convert between size tripes and a compact form fitting in 12 bits
|
||||
// where each size, which must be a POT, is encoded as its log2, on 4 bits
|
||||
// so the largest representable size is 2^15 == 32k ... big enough.
|
||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
|
||||
{
|
||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
|
||||
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
|
||||
}
|
||||
|
||||
uint16_t compact_size_triple(const size_triple_t& t)
|
||||
{
|
||||
return compact_size_triple(t.k, t.m, t.n);
|
||||
}
|
||||
uint16_t compact_size_triple(const size_triple_t& t) { return compact_size_triple(t.k, t.m, t.n); }
|
||||
|
||||
// A single benchmark. Initially only contains benchmark params.
|
||||
// Then call run(), which stores the result in the gflops field.
|
||||
struct benchmark_t
|
||||
{
|
||||
struct benchmark_t {
|
||||
uint16_t compact_product_size;
|
||||
uint16_t compact_block_size;
|
||||
bool use_default_block_size;
|
||||
float gflops;
|
||||
benchmark_t()
|
||||
: compact_product_size(0)
|
||||
, compact_block_size(0)
|
||||
, use_default_block_size(false)
|
||||
, gflops(0)
|
||||
{
|
||||
}
|
||||
benchmark_t(size_t pk, size_t pm, size_t pn,
|
||||
size_t bk, size_t bm, size_t bn)
|
||||
: compact_product_size(compact_size_triple(pk, pm, pn))
|
||||
, compact_block_size(compact_size_triple(bk, bm, bn))
|
||||
, use_default_block_size(false)
|
||||
, gflops(0)
|
||||
{}
|
||||
benchmark_t() : compact_product_size(0), compact_block_size(0), use_default_block_size(false), gflops(0) {}
|
||||
benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn)
|
||||
: compact_product_size(compact_size_triple(pk, pm, pn)),
|
||||
compact_block_size(compact_size_triple(bk, bm, bn)),
|
||||
use_default_block_size(false),
|
||||
gflops(0) {}
|
||||
benchmark_t(size_t pk, size_t pm, size_t pn)
|
||||
: compact_product_size(compact_size_triple(pk, pm, pn))
|
||||
, compact_block_size(0)
|
||||
, use_default_block_size(true)
|
||||
, gflops(0)
|
||||
{}
|
||||
: compact_product_size(compact_size_triple(pk, pm, pn)),
|
||||
compact_block_size(0),
|
||||
use_default_block_size(true),
|
||||
gflops(0) {}
|
||||
|
||||
void run();
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream& s, const benchmark_t& b)
|
||||
{
|
||||
ostream& operator<<(ostream& s, const benchmark_t& b) {
|
||||
s << hex << b.compact_product_size << dec;
|
||||
if (b.use_default_block_size) {
|
||||
size_triple_t t(b.compact_product_size);
|
||||
@ -141,17 +124,14 @@ ostream& operator<<(ostream& s, const benchmark_t& b)
|
||||
|
||||
// We sort first by increasing benchmark parameters,
|
||||
// then by decreasing performance.
|
||||
bool operator<(const benchmark_t& b1, const benchmark_t& b2)
|
||||
{
|
||||
bool operator<(const benchmark_t& b1, const benchmark_t& b2) {
|
||||
return b1.compact_product_size < b2.compact_product_size ||
|
||||
(b1.compact_product_size == b2.compact_product_size && (
|
||||
(b1.compact_block_size < b2.compact_block_size || (
|
||||
b1.compact_block_size == b2.compact_block_size &&
|
||||
b1.gflops > b2.gflops))));
|
||||
(b1.compact_product_size == b2.compact_product_size &&
|
||||
((b1.compact_block_size < b2.compact_block_size ||
|
||||
(b1.compact_block_size == b2.compact_block_size && b1.gflops > b2.gflops))));
|
||||
}
|
||||
|
||||
void benchmark_t::run()
|
||||
{
|
||||
void benchmark_t::run() {
|
||||
size_triple_t productsizes(compact_product_size);
|
||||
|
||||
if (use_default_block_size) {
|
||||
@ -169,20 +149,16 @@ void benchmark_t::run()
|
||||
|
||||
const size_t combined_three_matrices_sizes =
|
||||
sizeof(Scalar) *
|
||||
(productsizes.k * productsizes.m +
|
||||
productsizes.k * productsizes.n +
|
||||
productsizes.m * productsizes.n);
|
||||
(productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n);
|
||||
|
||||
// 64 M is large enough that nobody has a cache bigger than that,
|
||||
// while still being small enough that everybody has this much RAM,
|
||||
// so conveniently we don't need to special-case platforms here.
|
||||
const size_t unlikely_large_cache_size = 64 << 20;
|
||||
|
||||
const size_t working_set_size =
|
||||
min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
|
||||
const size_t working_set_size = min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
|
||||
|
||||
const size_t matrix_pool_size =
|
||||
1 + working_set_size / combined_three_matrices_sizes;
|
||||
const size_t matrix_pool_size = 1 + working_set_size / combined_three_matrices_sizes;
|
||||
|
||||
MatrixType* lhs = new MatrixType[matrix_pool_size];
|
||||
MatrixType* rhs = new MatrixType[matrix_pool_size];
|
||||
@ -200,7 +176,6 @@ void benchmark_t::run()
|
||||
float time_per_iter = 0.0f;
|
||||
size_t matrix_index = 0;
|
||||
while (true) {
|
||||
|
||||
double starttime = timer.getCpuTime();
|
||||
for (int i = 0; i < iters_at_a_time; i++) {
|
||||
dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
|
||||
@ -228,8 +203,7 @@ void benchmark_t::run()
|
||||
gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
|
||||
}
|
||||
|
||||
void print_cpuinfo()
|
||||
{
|
||||
void print_cpuinfo() {
|
||||
#ifdef __linux__
|
||||
cout << "contents of /proc/cpuinfo:" << endl;
|
||||
string line;
|
||||
@ -249,33 +223,30 @@ void print_cpuinfo()
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
string type_name()
|
||||
{
|
||||
string type_name() {
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
template <>
|
||||
string type_name<float>()
|
||||
{
|
||||
string type_name<float>() {
|
||||
return "float";
|
||||
}
|
||||
|
||||
template <>
|
||||
string type_name<double>()
|
||||
{
|
||||
string type_name<double>() {
|
||||
return "double";
|
||||
}
|
||||
|
||||
struct action_t
|
||||
{
|
||||
virtual const char* invokation_name() const { abort(); return nullptr; }
|
||||
struct action_t {
|
||||
virtual const char* invokation_name() const {
|
||||
abort();
|
||||
return nullptr;
|
||||
}
|
||||
virtual void run() const { abort(); }
|
||||
virtual ~action_t() {}
|
||||
};
|
||||
|
||||
void show_usage_and_exit(int /*argc*/, char* argv[],
|
||||
const vector<unique_ptr<action_t>>& available_actions)
|
||||
{
|
||||
void show_usage_and_exit(int /*argc*/, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
|
||||
cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
|
||||
cerr << "available actions:" << endl << endl;
|
||||
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
|
||||
@ -294,8 +265,7 @@ void show_usage_and_exit(int /*argc*/, char* argv[],
|
||||
exit(1);
|
||||
}
|
||||
|
||||
float measure_clock_speed()
|
||||
{
|
||||
float measure_clock_speed() {
|
||||
cerr << "Measuring clock speed... \r" << flush;
|
||||
|
||||
vector<float> all_gflops;
|
||||
@ -315,14 +285,12 @@ float measure_clock_speed()
|
||||
return result;
|
||||
}
|
||||
|
||||
struct human_duration_t
|
||||
{
|
||||
struct human_duration_t {
|
||||
int seconds;
|
||||
human_duration_t(int s) : seconds(s) {}
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream& s, const human_duration_t& d)
|
||||
{
|
||||
ostream& operator<<(ostream& s, const human_duration_t& d) {
|
||||
int remainder = d.seconds;
|
||||
if (remainder > 3600) {
|
||||
int hours = remainder / 3600;
|
||||
@ -342,8 +310,7 @@ ostream& operator<<(ostream& s, const human_duration_t& d)
|
||||
|
||||
const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
|
||||
|
||||
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
|
||||
{
|
||||
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run) {
|
||||
FILE* file = fopen(filename, "w");
|
||||
if (!file) {
|
||||
cerr << "Could not open file " << filename << " for writing." << endl;
|
||||
@ -358,8 +325,7 @@ void serialize_benchmarks(const char* filename, const vector<benchmark_t>& bench
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
|
||||
{
|
||||
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run) {
|
||||
FILE* file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
return false;
|
||||
@ -382,11 +348,7 @@ bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmark
|
||||
return true;
|
||||
}
|
||||
|
||||
void try_run_some_benchmarks(
|
||||
vector<benchmark_t>& benchmarks,
|
||||
double time_start,
|
||||
size_t& first_benchmark_to_run)
|
||||
{
|
||||
void try_run_some_benchmarks(vector<benchmark_t>& benchmarks, double time_start, size_t& first_benchmark_to_run) {
|
||||
if (first_benchmark_to_run == benchmarks.size()) {
|
||||
return;
|
||||
}
|
||||
@ -402,9 +364,7 @@ void try_run_some_benchmarks(
|
||||
time_now = timer.getRealTime();
|
||||
|
||||
// We check clock speed every minute and at the end.
|
||||
if (benchmark_index == benchmarks.size() ||
|
||||
time_now > time_last_clock_speed_measurement + 60.0f)
|
||||
{
|
||||
if (benchmark_index == benchmarks.size() || time_now > time_last_clock_speed_measurement + 60.0f) {
|
||||
time_last_clock_speed_measurement = time_now;
|
||||
|
||||
// Ensure that clock speed is as expected
|
||||
@ -425,8 +385,7 @@ void try_run_some_benchmarks(
|
||||
// which invalidates all benchmark results collected so far.
|
||||
// Either way, we better restart all over again now.
|
||||
if (benchmark_index) {
|
||||
cerr << "Restarting at " << 100.0f * ratio_done
|
||||
<< " % because clock speed increased. " << endl;
|
||||
cerr << "Restarting at " << 100.0f * ratio_done << " % because clock speed increased. " << endl;
|
||||
}
|
||||
max_clock_speed = current_clock_speed;
|
||||
first_benchmark_to_run = 0;
|
||||
@ -436,12 +395,9 @@ void try_run_some_benchmarks(
|
||||
bool rerun_last_tests = false;
|
||||
|
||||
if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
|
||||
cerr << "Measurements completed so far: "
|
||||
<< 100.0f * ratio_done
|
||||
<< " % " << endl;
|
||||
cerr << "Clock speed seems to be only "
|
||||
<< current_clock_speed/max_clock_speed
|
||||
<< " times what it used to be." << endl;
|
||||
cerr << "Measurements completed so far: " << 100.0f * ratio_done << " % " << endl;
|
||||
cerr << "Clock speed seems to be only " << current_clock_speed / max_clock_speed << " times what it used to be."
|
||||
<< endl;
|
||||
|
||||
unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
|
||||
|
||||
@ -454,9 +410,8 @@ void try_run_some_benchmarks(
|
||||
exit(2);
|
||||
}
|
||||
rerun_last_tests = true;
|
||||
cerr << "Sleeping "
|
||||
<< seconds_to_sleep_if_lower_clock_speed
|
||||
<< " s... \r" << endl;
|
||||
cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed << " s... \r"
|
||||
<< endl;
|
||||
sleep(seconds_to_sleep_if_lower_clock_speed);
|
||||
current_clock_speed = measure_clock_speed();
|
||||
seconds_to_sleep_if_lower_clock_speed *= 2;
|
||||
@ -464,8 +419,7 @@ void try_run_some_benchmarks(
|
||||
}
|
||||
|
||||
if (rerun_last_tests) {
|
||||
cerr << "Redoing the last "
|
||||
<< 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
|
||||
cerr << "Redoing the last " << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
|
||||
<< " % because clock speed had been low. " << endl;
|
||||
return;
|
||||
}
|
||||
@ -486,8 +440,7 @@ void try_run_some_benchmarks(
|
||||
// Display progress info on stderr
|
||||
if (time_now > time_last_progress_update + 1.0f) {
|
||||
time_last_progress_update = time_now;
|
||||
cerr << "Measurements... " << 100.0f * ratio_done
|
||||
<< " %, ETA "
|
||||
cerr << "Measurements... " << 100.0f * ratio_done << " %, ETA "
|
||||
<< human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
|
||||
<< " \r" << flush;
|
||||
}
|
||||
@ -498,19 +451,15 @@ void try_run_some_benchmarks(
|
||||
}
|
||||
}
|
||||
|
||||
void run_benchmarks(vector<benchmark_t>& benchmarks)
|
||||
{
|
||||
void run_benchmarks(vector<benchmark_t>& benchmarks) {
|
||||
size_t first_benchmark_to_run;
|
||||
vector<benchmark_t> deserialized_benchmarks;
|
||||
bool use_deserialized_benchmarks = false;
|
||||
if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
|
||||
cerr << "Found serialized session with "
|
||||
<< 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
|
||||
cerr << "Found serialized session with " << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
|
||||
<< " % already done" << endl;
|
||||
if (deserialized_benchmarks.size() == benchmarks.size() &&
|
||||
first_benchmark_to_run > 0 &&
|
||||
first_benchmark_to_run < benchmarks.size())
|
||||
{
|
||||
if (deserialized_benchmarks.size() == benchmarks.size() && first_benchmark_to_run > 0 &&
|
||||
first_benchmark_to_run < benchmarks.size()) {
|
||||
use_deserialized_benchmarks = true;
|
||||
}
|
||||
}
|
||||
@ -537,9 +486,7 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
|
||||
if (first_benchmark_to_run == 0) {
|
||||
time_start = timer.getRealTime();
|
||||
}
|
||||
try_run_some_benchmarks(benchmarks,
|
||||
time_start,
|
||||
first_benchmark_to_run);
|
||||
try_run_some_benchmarks(benchmarks, time_start, first_benchmark_to_run);
|
||||
}
|
||||
|
||||
// Sort timings by increasing benchmark parameters, and decreasing gflops.
|
||||
@ -550,10 +497,8 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
|
||||
// Collect best (i.e. now first) results for each parameter values.
|
||||
vector<benchmark_t> best_benchmarks;
|
||||
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
|
||||
if (best_benchmarks.empty() ||
|
||||
best_benchmarks.back().compact_product_size != it->compact_product_size ||
|
||||
best_benchmarks.back().compact_block_size != it->compact_block_size)
|
||||
{
|
||||
if (best_benchmarks.empty() || best_benchmarks.back().compact_product_size != it->compact_product_size ||
|
||||
best_benchmarks.back().compact_block_size != it->compact_block_size) {
|
||||
best_benchmarks.push_back(*it);
|
||||
}
|
||||
}
|
||||
@ -562,11 +507,9 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
|
||||
benchmarks = best_benchmarks;
|
||||
}
|
||||
|
||||
struct measure_all_pot_sizes_action_t : action_t
|
||||
{
|
||||
struct measure_all_pot_sizes_action_t : action_t {
|
||||
virtual const char* invokation_name() const { return "all-pot-sizes"; }
|
||||
virtual void run() const
|
||||
{
|
||||
virtual void run() const {
|
||||
vector<benchmark_t> benchmarks;
|
||||
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
|
||||
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
|
||||
@ -593,11 +536,9 @@ struct measure_all_pot_sizes_action_t : action_t
|
||||
}
|
||||
};
|
||||
|
||||
struct measure_default_sizes_action_t : action_t
|
||||
{
|
||||
struct measure_default_sizes_action_t : action_t {
|
||||
virtual const char* invokation_name() const { return "default-sizes"; }
|
||||
virtual void run() const
|
||||
{
|
||||
virtual void run() const {
|
||||
vector<benchmark_t> benchmarks;
|
||||
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
|
||||
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
|
||||
@ -618,8 +559,7 @@ struct measure_default_sizes_action_t : action_t
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int main(int argc, char* argv[]) {
|
||||
double time_start = timer.getRealTime();
|
||||
cout.precision(4);
|
||||
cerr.precision(4);
|
||||
|
@ -19,18 +19,15 @@ using namespace Eigen;
|
||||
#define SCALAR double
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int main(int argc, char *argv[]) {
|
||||
Matrix<SCALAR, MATSIZE, MATSIZE> I = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones();
|
||||
Matrix<SCALAR, MATSIZE, MATSIZE> m;
|
||||
for (int i = 0; i < MATSIZE; i++)
|
||||
for(int j = 0; j < MATSIZE; j++)
|
||||
{
|
||||
for (int j = 0; j < MATSIZE; j++) {
|
||||
m(i, j) = (i + MATSIZE * j);
|
||||
}
|
||||
asm("#begin");
|
||||
for(int a = 0; a < REPEAT; a++)
|
||||
{
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
m = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones() + 0.00005 * (m + (m * m));
|
||||
}
|
||||
asm("#end");
|
||||
|
@ -15,14 +15,12 @@ using namespace Eigen;
|
||||
#define SCALAR float
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int main(int argc, char *argv[]) {
|
||||
typedef Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic> Mat;
|
||||
Mat m(100, 100);
|
||||
m.setRandom();
|
||||
|
||||
for(int a = 0; a < REPEAT; a++)
|
||||
{
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
int r, c, nr, nc;
|
||||
r = Eigen::internal::random<int>(0, 10);
|
||||
c = Eigen::internal::random<int>(0, 10);
|
||||
|
@ -19,16 +19,14 @@ using namespace Eigen;
|
||||
#define REPEAT 100
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int main(int argc, char *argv[]) {
|
||||
MATTYPE I = MATTYPE::Ones(MATSIZE, MATSIZE);
|
||||
MATTYPE m(MATSIZE, MATSIZE);
|
||||
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
|
||||
{
|
||||
for (int i = 0; i < MATSIZE; i++)
|
||||
for (int j = 0; j < MATSIZE; j++) {
|
||||
m(i, j) = (i + j + 1) / (MATSIZE * MATSIZE);
|
||||
}
|
||||
for(int a = 0; a < REPEAT; a++)
|
||||
{
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
m = I + 0.0001 * (m + m * m);
|
||||
}
|
||||
cout << m(0, 0) << endl;
|
||||
|
@ -18,16 +18,13 @@ using namespace Eigen;
|
||||
#define REPEAT 1000
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int main(int argc, char *argv[]) {
|
||||
VECTYPE I = VECTYPE::Ones(VECSIZE);
|
||||
VECTYPE m(VECSIZE, 1);
|
||||
for(int i = 0; i < VECSIZE; i++)
|
||||
{
|
||||
for (int i = 0; i < VECSIZE; i++) {
|
||||
m[i] = 0.1 * i / VECSIZE;
|
||||
}
|
||||
for(int a = 0; a < REPEAT; a++)
|
||||
{
|
||||
for (int a = 0; a < REPEAT; a++) {
|
||||
m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m / 4);
|
||||
}
|
||||
cout << m[0] << endl;
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_aat_product {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_aat_product( int size ):_size(size)
|
||||
{
|
||||
Action_aat_product(int size) : _size(size) {
|
||||
MESSAGE("Action_aat_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -52,13 +49,11 @@ public :
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_aat_product( const Action_aat_product & )
|
||||
{
|
||||
Action_aat_product(const Action_aat_product&) {
|
||||
INFOS("illegal call to Action_aat_product Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
@ -66,7 +61,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_aat_product(void) {
|
||||
|
||||
MESSAGE("Action_aat_product Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -76,32 +70,20 @@ public :
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "aat_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "aat_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return double(_size)*double(_size)*double(_size);
|
||||
}
|
||||
double nb_op_base(void) { return double(_size) * double(_size) * double(_size); }
|
||||
|
||||
inline void initialize(void) {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
|
||||
Interface::aat_product(A,X,_size);
|
||||
|
||||
}
|
||||
inline void calculate(void) { Interface::aat_product(A, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
@ -111,18 +93,15 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::aat_product(A_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
@ -133,13 +112,7 @@ private :
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
|
||||
int _size;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_ata_product {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_ata_product( int size ):_size(size)
|
||||
{
|
||||
Action_ata_product(int size) : _size(size) {
|
||||
MESSAGE("Action_ata_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -52,13 +49,11 @@ public :
|
||||
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_ata_product( const Action_ata_product & )
|
||||
{
|
||||
Action_ata_product(const Action_ata_product&) {
|
||||
INFOS("illegal call to Action_ata_product Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
@ -66,7 +61,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_ata_product(void) {
|
||||
|
||||
MESSAGE("Action_ata_product Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -76,32 +70,20 @@ public :
|
||||
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "ata_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "ata_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 2.0*_size*_size*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 2.0 * _size * _size * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
|
||||
Interface::ata_product(A,X,_size);
|
||||
|
||||
}
|
||||
inline void calculate(void) { Interface::ata_product(A, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
@ -111,18 +93,15 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::ata_product(A_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
@ -133,13 +112,7 @@ private :
|
||||
typename Interface::gene_matrix A;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
|
||||
int _size;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -30,11 +30,8 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_atv_product {
|
||||
|
||||
public:
|
||||
|
||||
Action_atv_product( int size ) : _size(size)
|
||||
{
|
||||
Action_atv_product(int size) : _size(size) {
|
||||
MESSAGE("Action_atv_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -56,14 +53,12 @@ public :
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_atv_product( const Action_atv_product & )
|
||||
{
|
||||
Action_atv_product(const Action_atv_product&) {
|
||||
INFOS("illegal call to Action_atv_product Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
~Action_atv_product( void )
|
||||
{
|
||||
~Action_atv_product(void) {
|
||||
MESSAGE("Action_atv_product Dtor");
|
||||
|
||||
Interface::free_matrix(A, _size);
|
||||
@ -91,15 +86,13 @@ public :
|
||||
BTL_ASM_COMMENT("end atv");
|
||||
}
|
||||
|
||||
void check_result( void )
|
||||
{
|
||||
void check_result(void) {
|
||||
if (_size > 128) return;
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::atv_product(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
@ -108,7 +101,6 @@ public :
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
@ -122,13 +114,7 @@ private :
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
|
||||
int _size;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -29,12 +29,9 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_axpby {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size)
|
||||
{
|
||||
Action_axpby(int size) : _alpha(0.5), _beta(0.95), _size(size) {
|
||||
MESSAGE("Action_axpby Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
@ -51,8 +48,7 @@ public :
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_axpby( const Action_axpby & )
|
||||
{
|
||||
Action_axpby(const Action_axpby&) {
|
||||
INFOS("illegal call to Action_axpby Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -70,14 +66,9 @@ public :
|
||||
}
|
||||
|
||||
// action name
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "axpby_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "axpby_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 3.0*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 3.0 * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
@ -97,8 +88,7 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::axpby(_alpha, X_stl, _beta, Y_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
@ -107,7 +97,6 @@ public :
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector Y_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_axpy {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_axpy( int size ):_coef(1.0),_size(size)
|
||||
{
|
||||
Action_axpy(int size) : _coef(1.0), _size(size) {
|
||||
MESSAGE("Action_axpy Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
@ -52,14 +49,11 @@ public :
|
||||
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
Interface::vector_from_stl(Y, Y_stl);
|
||||
|
||||
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_axpy( const Action_axpy & )
|
||||
{
|
||||
Action_axpy(const Action_axpy&) {
|
||||
INFOS("illegal call to Action_axpy Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -67,7 +61,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_axpy(void) {
|
||||
|
||||
MESSAGE("Action_axpy Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -81,14 +74,9 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "axpy_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "axpy_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 2.0*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 2.0 * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
@ -109,18 +97,15 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::axpy(_coef, X_stl, Y_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector Y_stl;
|
||||
typename Interface::stl_vector resu_stl;
|
||||
|
@ -29,13 +29,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_cholesky {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_cholesky( int size ):_size(size)
|
||||
{
|
||||
Action_cholesky(int size) : _size(size) {
|
||||
MESSAGE("Action_cholesky Ctor");
|
||||
|
||||
// STL mat/vec initialization
|
||||
@ -43,8 +40,7 @@ public :
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
|
||||
// make sure X is invertible
|
||||
for (int i=0; i<_size; ++i)
|
||||
X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
|
||||
for (int i = 0; i < _size; ++i) X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
@ -52,8 +48,7 @@ public :
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j=0; j<_size; ++j)
|
||||
{
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
double r = std::max(_size - j - 1, 0);
|
||||
_cost += 2 * (r * j + r + j);
|
||||
}
|
||||
@ -61,8 +56,7 @@ public :
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_cholesky( const Action_cholesky & )
|
||||
{
|
||||
Action_cholesky(const Action_cholesky&) {
|
||||
INFOS("illegal call to Action_cholesky Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -70,7 +64,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_cholesky(void) {
|
||||
|
||||
MESSAGE("Action_cholesky Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -81,22 +74,13 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "cholesky_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "cholesky_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize( void ){
|
||||
Interface::copy_matrix(X_ref,X,_size);
|
||||
}
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::cholesky(X,C,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::cholesky(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
@ -109,11 +93,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
|
||||
|
@ -25,12 +25,9 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_ger {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
BTL_DONT_INLINE Action_ger( int size ):_size(size)
|
||||
{
|
||||
BTL_DONT_INLINE Action_ger(int size) : _size(size) {
|
||||
MESSAGE("Action_ger Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -50,8 +47,7 @@ public :
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_ger( const Action_ger & )
|
||||
{
|
||||
Action_ger(const Action_ger&) {
|
||||
INFOS("illegal call to Action_ger Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -65,18 +61,12 @@ public :
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_vector(B_ref);
|
||||
Interface::free_vector(X_ref);
|
||||
|
||||
}
|
||||
|
||||
// action name
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "ger_" + Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "ger_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 2.0*_size*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
@ -96,18 +86,15 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::ger(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-3) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
@ -124,5 +111,4 @@ private :
|
||||
int _size;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -29,13 +29,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_hessenberg {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_hessenberg( int size ):_size(size)
|
||||
{
|
||||
Action_hessenberg(int size) : _size(size) {
|
||||
MESSAGE("Action_hessenberg Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
@ -50,8 +47,7 @@ public :
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j=0; j<_size-2; ++j)
|
||||
{
|
||||
for (int j = 0; j < _size - 2; ++j) {
|
||||
double r = std::max(0, _size - j - 1);
|
||||
double b = std::max(0, _size - j - 2);
|
||||
_cost += 6 + 3 * b + r * r * 4 + r * _size * 4;
|
||||
@ -60,8 +56,7 @@ public :
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_hessenberg( const Action_hessenberg & )
|
||||
{
|
||||
Action_hessenberg(const Action_hessenberg&) {
|
||||
INFOS("illegal call to Action_hessenberg Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -69,7 +64,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_hessenberg(void) {
|
||||
|
||||
MESSAGE("Action_hessenberg Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -80,22 +74,13 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "hessenberg_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "hessenberg_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize( void ){
|
||||
Interface::copy_matrix(X_ref,X,_size);
|
||||
}
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::hessenberg(X,C,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::hessenberg(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
@ -110,11 +95,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
@ -129,22 +112,17 @@ private :
|
||||
|
||||
template <class Interface>
|
||||
class Action_tridiagonalization {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_tridiagonalization( int size ):_size(size)
|
||||
{
|
||||
Action_tridiagonalization(int size) : _size(size) {
|
||||
MESSAGE("Action_tridiagonalization Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(X_stl, _size);
|
||||
|
||||
for(int i=0; i<_size; ++i)
|
||||
{
|
||||
for(int j=0; j<i; ++j)
|
||||
X_stl[i][j] = X_stl[j][i];
|
||||
for (int i = 0; i < _size; ++i) {
|
||||
for (int j = 0; j < i; ++j) X_stl[i][j] = X_stl[j][i];
|
||||
}
|
||||
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
@ -156,8 +134,7 @@ public :
|
||||
Interface::matrix_from_stl(C, C_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j=0; j<_size-2; ++j)
|
||||
{
|
||||
for (int j = 0; j < _size - 2; ++j) {
|
||||
double r = std::max(0, _size - j - 1);
|
||||
double b = std::max(0, _size - j - 2);
|
||||
_cost += 6. + 3. * b + r * r * 8.;
|
||||
@ -166,8 +143,7 @@ public :
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_tridiagonalization( const Action_tridiagonalization & )
|
||||
{
|
||||
Action_tridiagonalization(const Action_tridiagonalization&) {
|
||||
INFOS("illegal call to Action_tridiagonalization Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -175,7 +151,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_tridiagonalization(void) {
|
||||
|
||||
MESSAGE("Action_tridiagonalization Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -188,17 +163,11 @@ public :
|
||||
|
||||
static inline std::string name(void) { return "tridiagonalization_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize( void ){
|
||||
Interface::copy_matrix(X_ref,X,_size);
|
||||
}
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::tridiagonalization(X,C,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::tridiagonalization(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
@ -213,11 +182,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
@ -29,13 +29,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_lu_decomp {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_lu_decomp( int size ):_size(size)
|
||||
{
|
||||
Action_lu_decomp(int size) : _size(size) {
|
||||
MESSAGE("Action_lu_decomp Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
@ -54,8 +51,7 @@ public :
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_lu_decomp( const Action_lu_decomp & )
|
||||
{
|
||||
Action_lu_decomp(const Action_lu_decomp&) {
|
||||
INFOS("illegal call to Action_lu_decomp Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -63,7 +59,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_lu_decomp(void) {
|
||||
|
||||
MESSAGE("Action_lu_decomp Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -74,22 +69,13 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "complete_lu_decomp_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "complete_lu_decomp_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize( void ){
|
||||
Interface::copy_matrix(X_ref,X,_size);
|
||||
}
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::lu_decomp(X,C,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::lu_decomp(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
@ -104,11 +90,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
@ -29,23 +29,15 @@
|
||||
using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_lu_solve
|
||||
{
|
||||
|
||||
class Action_lu_solve {
|
||||
public:
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "lu_solve_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "lu_solve_" + Interface::name(); }
|
||||
|
||||
static double nb_op_base(int size) {
|
||||
return 2.0 * size * size * size / 3.0; // questionable but not really important
|
||||
}
|
||||
|
||||
|
||||
static double calculate(int nb_calc, int size) {
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
@ -83,7 +75,6 @@ public :
|
||||
chronos.start();
|
||||
|
||||
for (int ii = 0; ii < nb_calc; ii++) {
|
||||
|
||||
// LU factorization
|
||||
Interface::copy_matrix(A, LU, size);
|
||||
Interface::LU_factor(LU, pivot, size);
|
||||
@ -91,7 +82,6 @@ public :
|
||||
// LU solve
|
||||
|
||||
Interface::LU_solve(LU, pivot, B, X, size);
|
||||
|
||||
}
|
||||
|
||||
// Time stop
|
||||
@ -107,8 +97,7 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, X_stl, B_new_stl, size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(B_stl,B_new_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(B_stl, B_new_stl);
|
||||
|
||||
if (error > 1.e-5) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
@ -126,11 +115,6 @@ public :
|
||||
|
||||
return time;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_matrix_matrix_product {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_matrix_matrix_product( int size ):_size(size)
|
||||
{
|
||||
Action_matrix_matrix_product(int size) : _size(size) {
|
||||
MESSAGE("Action_matrix_matrix_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -55,13 +52,11 @@ public :
|
||||
Interface::matrix_from_stl(A, A_stl);
|
||||
Interface::matrix_from_stl(B, B_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_matrix_matrix_product( const Action_matrix_matrix_product & )
|
||||
{
|
||||
Action_matrix_matrix_product(const Action_matrix_matrix_product&) {
|
||||
INFOS("illegal call to Action_matrix_matrix_product Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
@ -69,7 +64,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_matrix_matrix_product(void) {
|
||||
|
||||
MESSAGE("Action_matrix_matrix_product Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -81,41 +75,28 @@ public :
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(B_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "matrix_matrix_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 2.0*_size*_size*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 2.0 * _size * _size * _size; }
|
||||
|
||||
inline void initialize(void) {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(B_ref, B, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::matrix_matrix_product(A,B,X,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::matrix_matrix_product(A, B, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
|
||||
// calculation check
|
||||
if (_size<200)
|
||||
{
|
||||
if (_size < 200) {
|
||||
Interface::matrix_to_stl(X, resu_stl);
|
||||
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, _size);
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
@ -124,7 +105,6 @@ public :
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix B_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
@ -138,13 +118,7 @@ private :
|
||||
typename Interface::gene_matrix B;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
|
||||
int _size;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -31,20 +31,12 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_matrix_matrix_product_bis {
|
||||
|
||||
public:
|
||||
static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "matrix_matrix_"+Interface::name();
|
||||
}
|
||||
|
||||
static double nb_op_base(int size){
|
||||
return 2.0*size*size*size;
|
||||
}
|
||||
static double nb_op_base(int size) { return 2.0 * size * size * size; }
|
||||
|
||||
static double calculate(int nb_calc, int size) {
|
||||
|
||||
// STL matrix and vector initialization
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
@ -65,7 +57,6 @@ public :
|
||||
typename Interface::gene_matrix B;
|
||||
typename Interface::gene_matrix X;
|
||||
|
||||
|
||||
Interface::matrix_from_stl(A_ref, A_stl);
|
||||
Interface::matrix_from_stl(B_ref, B_stl);
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
@ -74,7 +65,6 @@ public :
|
||||
Interface::matrix_from_stl(B, B_stl);
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
|
||||
// STL_timer utilities
|
||||
|
||||
STL_timer chronos;
|
||||
@ -84,15 +74,12 @@ public :
|
||||
chronos.start_baseline(nb_calc);
|
||||
|
||||
do {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, size);
|
||||
Interface::copy_matrix(B_ref, B, size);
|
||||
Interface::copy_matrix(X_ref, X, size);
|
||||
|
||||
|
||||
// Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!!
|
||||
}
|
||||
while(chronos.check());
|
||||
} while (chronos.check());
|
||||
|
||||
chronos.report(true);
|
||||
|
||||
@ -101,14 +88,12 @@ public :
|
||||
chronos.start(nb_calc);
|
||||
|
||||
do {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, size);
|
||||
Interface::copy_matrix(B_ref, B, size);
|
||||
Interface::copy_matrix(X_ref, X, size);
|
||||
|
||||
Interface::matrix_matrix_product(A, B, X, size); // here it is not commented !!!!
|
||||
}
|
||||
while(chronos.check());
|
||||
} while (chronos.check());
|
||||
|
||||
chronos.report(true);
|
||||
|
||||
@ -122,8 +107,7 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-6) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
@ -142,11 +126,6 @@ public :
|
||||
|
||||
return time;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_matrix_vector_product {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
BTL_DONT_INLINE Action_matrix_vector_product( int size ):_size(size)
|
||||
{
|
||||
BTL_DONT_INLINE Action_matrix_vector_product(int size) : _size(size) {
|
||||
MESSAGE("Action_matrix_vector_product Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -54,13 +51,11 @@ public :
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_matrix_vector_product( const Action_matrix_vector_product & )
|
||||
{
|
||||
Action_matrix_vector_product(const Action_matrix_vector_product&) {
|
||||
INFOS("illegal call to Action_matrix_vector_product Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -68,7 +63,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
BTL_DONT_INLINE ~Action_matrix_vector_product(void) {
|
||||
|
||||
MESSAGE("Action_matrix_vector_product Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -80,26 +74,18 @@ public :
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_vector(B_ref);
|
||||
Interface::free_vector(X_ref);
|
||||
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "matrix_vector_" + Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "matrix_vector_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 2.0*_size*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
@ -109,25 +95,21 @@ public :
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void check_result(void) {
|
||||
|
||||
// calculation check
|
||||
|
||||
Interface::vector_to_stl(X, resu_stl);
|
||||
|
||||
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-5) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
@ -141,13 +123,7 @@ private :
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
|
||||
int _size;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -29,13 +29,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_partial_lu {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_partial_lu( int size ):_size(size)
|
||||
{
|
||||
Action_partial_lu(int size) : _size(size) {
|
||||
MESSAGE("Action_partial_lu Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
@ -43,8 +40,7 @@ public :
|
||||
init_matrix<null_function>(C_stl, _size);
|
||||
|
||||
// make sure X is invertible
|
||||
for (int i=0; i<_size; ++i)
|
||||
X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
|
||||
for (int i = 0; i < _size; ++i) X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref, X_stl);
|
||||
@ -56,8 +52,7 @@ public :
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_partial_lu( const Action_partial_lu & )
|
||||
{
|
||||
Action_partial_lu(const Action_partial_lu&) {
|
||||
INFOS("illegal call to Action_partial_lu Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -65,7 +60,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_partial_lu(void) {
|
||||
|
||||
MESSAGE("Action_partial_lu Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -76,22 +70,13 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "partial_lu_decomp_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "partial_lu_decomp_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize( void ){
|
||||
Interface::copy_matrix(X_ref,X,_size);
|
||||
}
|
||||
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::partial_lu_decomp(X,C,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::partial_lu_decomp(X, C, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
// calculation check
|
||||
@ -106,11 +91,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
|
||||
|
@ -25,12 +25,9 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_rot {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
BTL_DONT_INLINE Action_rot( int size ):_size(size)
|
||||
{
|
||||
BTL_DONT_INLINE Action_rot(int size) : _size(size) {
|
||||
MESSAGE("Action_rot Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -46,8 +43,7 @@ public :
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_rot( const Action_rot & )
|
||||
{
|
||||
Action_rot(const Action_rot&) {
|
||||
INFOS("illegal call to Action_rot Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -62,14 +58,9 @@ public :
|
||||
}
|
||||
|
||||
// action name
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "rot_" + Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "rot_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 6.0*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 6.0 * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_vector(A_ref, A, _size);
|
||||
@ -95,11 +86,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_vector A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
|
||||
@ -112,5 +101,4 @@ private :
|
||||
int _size;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_symv {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
BTL_DONT_INLINE Action_symv( int size ):_size(size)
|
||||
{
|
||||
BTL_DONT_INLINE Action_symv(int size) : _size(size) {
|
||||
MESSAGE("Action_symv Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -52,13 +49,11 @@ public :
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
Interface::vector_from_stl(X_ref, X_stl);
|
||||
Interface::vector_from_stl(X, X_stl);
|
||||
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_symv( const Action_symv & )
|
||||
{
|
||||
Action_symv(const Action_symv&) {
|
||||
INFOS("illegal call to Action_symv Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -75,21 +70,14 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "symv_" + Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "symv_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 2.0*_size*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_vector(B_ref, B, _size);
|
||||
Interface::copy_vector(X_ref, X, _size);
|
||||
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE void calculate(void) {
|
||||
@ -105,18 +93,15 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::symv(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-5) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
@ -130,10 +115,7 @@ private :
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
|
||||
int _size;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_syr2 {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
BTL_DONT_INLINE Action_syr2( int size ):_size(size)
|
||||
{
|
||||
BTL_DONT_INLINE Action_syr2(int size) : _size(size) {
|
||||
// STL matrix and vector initialization
|
||||
typename Interface::stl_matrix tmp;
|
||||
init_matrix<pseudo_random>(A_stl, _size);
|
||||
@ -54,8 +51,7 @@ public :
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
Action_syr2( const Action_syr2 & )
|
||||
{
|
||||
Action_syr2(const Action_syr2&) {
|
||||
INFOS("illegal call to Action_syr2 Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -72,14 +68,9 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "syr2_" + Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "syr2_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return 2.0*_size*_size;
|
||||
}
|
||||
double nb_op_base(void) { return 2.0 * _size * _size; }
|
||||
|
||||
BTL_DONT_INLINE void initialize(void) {
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
@ -99,18 +90,15 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::syr2(A_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-3) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
@ -124,10 +112,7 @@ private :
|
||||
typename Interface::gene_vector B;
|
||||
typename Interface::gene_vector X;
|
||||
|
||||
|
||||
int _size;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -29,23 +29,18 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_trisolve {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_trisolve( int size ):_size(size)
|
||||
{
|
||||
Action_trisolve(int size) : _size(size) {
|
||||
MESSAGE("Action_trisolve Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(L_stl, _size);
|
||||
init_vector<pseudo_random>(B_stl, _size);
|
||||
init_vector<null_function>(X_stl, _size);
|
||||
for (int j=0; j<_size; ++j)
|
||||
{
|
||||
for (int i=0; i<j; ++i)
|
||||
L_stl[j][i] = 0;
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
for (int i = 0; i < j; ++i) L_stl[j][i] = 0;
|
||||
L_stl[j][j] += 3;
|
||||
}
|
||||
|
||||
@ -57,16 +52,14 @@ public :
|
||||
Interface::vector_from_stl(B, B_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j=0; j<_size; ++j)
|
||||
{
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
_cost += 2 * j + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_trisolve( const Action_trisolve & )
|
||||
{
|
||||
Action_trisolve(const Action_trisolve&) {
|
||||
INFOS("illegal call to Action_trisolve Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
@ -74,7 +67,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_trisolve(void) {
|
||||
|
||||
MESSAGE("Action_trisolve Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -85,22 +77,15 @@ public :
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "trisolve_vector_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "trisolve_vector_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) {
|
||||
// Interface::copy_vector(X_ref,X,_size);
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::trisolve_lower(L,B,X,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::trisolve_lower(L, B, X, _size); }
|
||||
|
||||
void check_result() {
|
||||
if (_size > 128) return;
|
||||
@ -109,18 +94,15 @@ public :
|
||||
|
||||
STL_interface<typename Interface::real_type>::trisolve_lower(L_stl, B_stl, X_stl, _size);
|
||||
|
||||
typename Interface::real_type error=
|
||||
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
|
||||
|
||||
if (error > 1.e-4) {
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(2);
|
||||
} // else INFOS("CALCULATION OK...residual=" << error);
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix L_stl;
|
||||
typename Interface::stl_vector X_stl;
|
||||
typename Interface::stl_vector B_stl;
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_trisolve_matrix {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_trisolve_matrix( int size ):_size(size)
|
||||
{
|
||||
Action_trisolve_matrix(int size) : _size(size) {
|
||||
MESSAGE("Action_trisolve_matrix Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -46,10 +43,8 @@ public :
|
||||
init_matrix<null_function>(X_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
for (int j=0; j<_size; ++j)
|
||||
{
|
||||
for (int i=0; i<j; ++i)
|
||||
A_stl[j][i] = 0;
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
|
||||
A_stl[j][j] += 3;
|
||||
}
|
||||
|
||||
@ -64,8 +59,7 @@ public :
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j=0; j<_size; ++j)
|
||||
{
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
_cost += 2 * j + 1;
|
||||
}
|
||||
_cost *= _size;
|
||||
@ -73,8 +67,7 @@ public :
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_trisolve_matrix( const Action_trisolve_matrix & )
|
||||
{
|
||||
Action_trisolve_matrix(const Action_trisolve_matrix&) {
|
||||
INFOS("illegal call to Action_trisolve_matrix Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
@ -82,7 +75,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_trisolve_matrix(void) {
|
||||
|
||||
MESSAGE("Action_trisolve_matrix Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -94,34 +86,23 @@ public :
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(B_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "trisolve_matrix_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "trisolve_matrix_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(B_ref, B, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::trisolve_lower_matrix(A,B,X,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::trisolve_lower_matrix(A, B, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
|
||||
// calculation check
|
||||
|
||||
// Interface::matrix_to_stl(X,resu_stl);
|
||||
@ -135,11 +116,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// // exit(1);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix B_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
@ -155,11 +134,6 @@ private :
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -30,13 +30,10 @@ using namespace std;
|
||||
|
||||
template <class Interface>
|
||||
class Action_trmm {
|
||||
|
||||
public:
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_trmm( int size ):_size(size)
|
||||
{
|
||||
Action_trmm(int size) : _size(size) {
|
||||
MESSAGE("Action_trmm Ctor");
|
||||
|
||||
// STL matrix and vector initialization
|
||||
@ -46,10 +43,8 @@ public :
|
||||
init_matrix<null_function>(X_stl, _size);
|
||||
init_matrix<null_function>(resu_stl, _size);
|
||||
|
||||
for (int j=0; j<_size; ++j)
|
||||
{
|
||||
for (int i=0; i<j; ++i)
|
||||
A_stl[j][i] = 0;
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
for (int i = 0; i < j; ++i) A_stl[j][i] = 0;
|
||||
A_stl[j][j] += 3;
|
||||
}
|
||||
|
||||
@ -64,8 +59,7 @@ public :
|
||||
Interface::matrix_from_stl(X, X_stl);
|
||||
|
||||
_cost = 0;
|
||||
for (int j=0; j<_size; ++j)
|
||||
{
|
||||
for (int j = 0; j < _size; ++j) {
|
||||
_cost += 2 * j + 1;
|
||||
}
|
||||
_cost *= _size;
|
||||
@ -73,8 +67,7 @@ public :
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_trmm( const Action_trmm & )
|
||||
{
|
||||
Action_trmm(const Action_trmm&) {
|
||||
INFOS("illegal call to Action_trmm Copy Ctor");
|
||||
exit(0);
|
||||
}
|
||||
@ -82,7 +75,6 @@ public :
|
||||
// Dtor
|
||||
|
||||
~Action_trmm(void) {
|
||||
|
||||
MESSAGE("Action_trmm Dtor");
|
||||
|
||||
// deallocation
|
||||
@ -94,34 +86,23 @@ public :
|
||||
Interface::free_matrix(A_ref, _size);
|
||||
Interface::free_matrix(B_ref, _size);
|
||||
Interface::free_matrix(X_ref, _size);
|
||||
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "trmm_"+Interface::name();
|
||||
}
|
||||
static inline std::string name(void) { return "trmm_" + Interface::name(); }
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
double nb_op_base(void) { return _cost; }
|
||||
|
||||
inline void initialize(void) {
|
||||
|
||||
Interface::copy_matrix(A_ref, A, _size);
|
||||
Interface::copy_matrix(B_ref, B, _size);
|
||||
Interface::copy_matrix(X_ref, X, _size);
|
||||
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::trmm(A,B,X,_size);
|
||||
}
|
||||
inline void calculate(void) { Interface::trmm(A, B, X, _size); }
|
||||
|
||||
void check_result(void) {
|
||||
|
||||
// calculation check
|
||||
|
||||
// Interface::matrix_to_stl(X,resu_stl);
|
||||
@ -135,11 +116,9 @@ public :
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// // exit(1);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typename Interface::stl_matrix A_stl;
|
||||
typename Interface::stl_matrix B_stl;
|
||||
typename Interface::stl_matrix X_stl;
|
||||
@ -155,11 +134,6 @@ private :
|
||||
|
||||
int _size;
|
||||
double _cost;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -18,4 +18,3 @@
|
||||
#include "action_rot.hh"
|
||||
|
||||
// #include "action_lu_solve.hh"
|
||||
|
||||
|
@ -28,42 +28,37 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
|
||||
double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
|
||||
const int size_max);
|
||||
|
||||
class Lib_Mean {
|
||||
|
||||
public:
|
||||
Lib_Mean(void) : _lib_name(), _mean_in_cache(), _mean_out_of_cache() {
|
||||
MESSAGE("Lib_mean Default Ctor");
|
||||
MESSAGE("!!! should not be used");
|
||||
exit(0);
|
||||
}
|
||||
Lib_Mean(const string & name, const double & mic, const double & moc):_lib_name(name),_mean_in_cache(mic),_mean_out_of_cache(moc){
|
||||
Lib_Mean(const string &name, const double &mic, const double &moc)
|
||||
: _lib_name(name), _mean_in_cache(mic), _mean_out_of_cache(moc) {
|
||||
MESSAGE("Lib_mean Ctor");
|
||||
}
|
||||
Lib_Mean(const Lib_Mean & lm):_lib_name(lm._lib_name),_mean_in_cache(lm._mean_in_cache),_mean_out_of_cache(lm._mean_out_of_cache){
|
||||
Lib_Mean(const Lib_Mean &lm)
|
||||
: _lib_name(lm._lib_name), _mean_in_cache(lm._mean_in_cache), _mean_out_of_cache(lm._mean_out_of_cache) {
|
||||
MESSAGE("Lib_mean Copy Ctor");
|
||||
}
|
||||
~Lib_Mean( void ){
|
||||
MESSAGE("Lib_mean Dtor");
|
||||
}
|
||||
~Lib_Mean(void) { MESSAGE("Lib_mean Dtor"); }
|
||||
|
||||
double _mean_in_cache;
|
||||
double _mean_out_of_cache;
|
||||
string _lib_name;
|
||||
|
||||
bool operator < ( const Lib_Mean &right) const
|
||||
{
|
||||
bool operator<(const Lib_Mean &right) const {
|
||||
// return ( this->_mean_out_of_cache > right._mean_out_of_cache) ;
|
||||
return (this->_mean_in_cache > right._mean_in_cache);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
int main( int argc , char *argv[] )
|
||||
{
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 6) {
|
||||
INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2...");
|
||||
exit(0);
|
||||
@ -75,11 +70,9 @@ int main( int argc , char *argv[] )
|
||||
int min_out_of_cache = atoi(argv[4]);
|
||||
int max_out_of_cache = atoi(argv[5]);
|
||||
|
||||
|
||||
multiset<Lib_Mean> s_lib_mean;
|
||||
|
||||
for (int i = 6; i < argc; i++) {
|
||||
|
||||
string filename = argv[i];
|
||||
|
||||
INFOS(filename);
|
||||
@ -88,7 +81,6 @@ int main( int argc , char *argv[] )
|
||||
double moc = 0;
|
||||
|
||||
{
|
||||
|
||||
vector<int> tab_sizes;
|
||||
vector<double> tab_mflops;
|
||||
|
||||
@ -100,18 +92,21 @@ int main( int argc , char *argv[] )
|
||||
Lib_Mean cur_lib_mean(filename, mic, moc);
|
||||
|
||||
s_lib_mean.insert(cur_lib_mean);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
cout << "<TABLE BORDER CELLPADDING=2>" << endl;
|
||||
cout << " <TR>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> " << argv[1] << " </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> <a href=""#mean_marker""> in cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
|
||||
cout << " <TH ALIGN=CENTER> <a href="
|
||||
"#mean_marker"
|
||||
"> in cache <BR> mean perf <BR> Mflops </a></TH>"
|
||||
<< endl;
|
||||
cout << " <TH ALIGN=CENTER> in cache <BR> % best </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> <a href=""#mean_marker""> out of cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
|
||||
cout << " <TH ALIGN=CENTER> <a href="
|
||||
"#mean_marker"
|
||||
"> out of cache <BR> mean perf <BR> Mflops </a></TH>"
|
||||
<< endl;
|
||||
cout << " <TH ALIGN=CENTER> out of cache <BR> % best </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> details </TH>" << endl;
|
||||
cout << " <TH ALIGN=CENTER> comments </TH>" << endl;
|
||||
@ -120,22 +115,21 @@ int main( int argc , char *argv[] )
|
||||
multiset<Lib_Mean>::iterator is = s_lib_mean.begin();
|
||||
Lib_Mean best(*is);
|
||||
|
||||
|
||||
for (is = s_lib_mean.begin(); is != s_lib_mean.end(); is++) {
|
||||
|
||||
cout << " <TR>" << endl;
|
||||
cout << " <TD> " << is->_lib_name << " </TD>" << endl;
|
||||
cout << " <TD> " << is->_mean_in_cache << " </TD>" << endl;
|
||||
cout << " <TD> " << 100 * (is->_mean_in_cache / best._mean_in_cache) << " </TD>" << endl;
|
||||
cout << " <TD> " << is->_mean_out_of_cache << " </TD>" << endl;
|
||||
cout << " <TD> " << 100 * (is->_mean_out_of_cache / best._mean_out_of_cache) << " </TD>" << endl;
|
||||
cout << " <TD> " <<
|
||||
"<a href=\"#"<<is->_lib_name<<"_"<<argv[1]<<"\">snippet</a>/"
|
||||
"<a href=\"#"<<is->_lib_name<<"_flags\">flags</a> </TD>" << endl ;
|
||||
cout << " <TD> " <<
|
||||
"<a href=\"#"<<is->_lib_name<<"_comments\">click here</a> </TD>" << endl ;
|
||||
cout << " <TD> "
|
||||
<< "<a href=\"#" << is->_lib_name << "_" << argv[1]
|
||||
<< "\">snippet</a>/"
|
||||
"<a href=\"#"
|
||||
<< is->_lib_name << "_flags\">flags</a> </TD>" << endl;
|
||||
cout << " <TD> "
|
||||
<< "<a href=\"#" << is->_lib_name << "_comments\">click here</a> </TD>" << endl;
|
||||
cout << " </TR>" << endl;
|
||||
|
||||
}
|
||||
|
||||
cout << "</TABLE>" << endl;
|
||||
@ -147,26 +141,19 @@ int main( int argc , char *argv[] )
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
|
||||
}
|
||||
|
||||
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
|
||||
|
||||
double mean_calc(const vector<int> &tab_sizes, const vector<double> &tab_mflops, const int size_min,
|
||||
const int size_max) {
|
||||
int size = tab_sizes.size();
|
||||
int nb_sample = 0;
|
||||
double mean = 0.0;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
||||
|
||||
if ((tab_sizes[i] >= size_min) && (tab_sizes[i] <= size_max)) {
|
||||
|
||||
nb_sample++;
|
||||
mean += tab_mflops[i];
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if (nb_sample == 0) {
|
||||
@ -176,7 +163,3 @@ double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflop
|
||||
|
||||
return mean / nb_sample;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -28,15 +28,11 @@
|
||||
using namespace std;
|
||||
|
||||
void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops);
|
||||
void regularize_curve(const string & filename,
|
||||
const vector<double> & tab_mflops,
|
||||
const vector<int> & tab_sizes,
|
||||
void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
|
||||
int start_cut_size, int stop_cut_size);
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int main( int argc , char *argv[] )
|
||||
{
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// input data
|
||||
|
||||
if (argc < 4) {
|
||||
@ -62,52 +58,39 @@ int main( int argc , char *argv[] )
|
||||
// regularizeing
|
||||
|
||||
regularize_curve(regularize_filename, tab_mflops, tab_sizes, start_cut_size, stop_cut_size);
|
||||
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void regularize_curve(const string & filename,
|
||||
const vector<double> & tab_mflops,
|
||||
const vector<int> & tab_sizes,
|
||||
int start_cut_size, int stop_cut_size)
|
||||
{
|
||||
void regularize_curve(const string &filename, const vector<double> &tab_mflops, const vector<int> &tab_sizes,
|
||||
int start_cut_size, int stop_cut_size) {
|
||||
int size = tab_mflops.size();
|
||||
ofstream output_file(filename.c_str(), ios::out);
|
||||
|
||||
int i = 0;
|
||||
|
||||
while (tab_sizes[i] < start_cut_size) {
|
||||
|
||||
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
|
||||
i++;
|
||||
|
||||
}
|
||||
|
||||
output_file << endl;
|
||||
|
||||
while (tab_sizes[i] < stop_cut_size) {
|
||||
|
||||
i++;
|
||||
|
||||
}
|
||||
|
||||
while (i < size) {
|
||||
|
||||
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
|
||||
i++;
|
||||
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
|
||||
|
||||
ifstream input_file(filename.c_str(), ios::in);
|
||||
|
||||
if (!input_file) {
|
||||
@ -128,4 +111,3 @@ void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<doubl
|
||||
|
||||
input_file.close();
|
||||
}
|
||||
|
||||
|
@ -35,9 +35,7 @@ void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & s
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int main( int argc , char *argv[] )
|
||||
{
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// input data
|
||||
|
||||
if (argc < 3) {
|
||||
@ -69,36 +67,24 @@ int main( int argc , char *argv[] )
|
||||
// output result
|
||||
|
||||
write_xy_file(smooth_filename, tab_sizes, smooth_tab_mflops);
|
||||
|
||||
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class VECTOR>
|
||||
double weighted_mean(const VECTOR & data)
|
||||
{
|
||||
|
||||
double weighted_mean(const VECTOR &data) {
|
||||
double mean = 0.0;
|
||||
|
||||
for (int i = 0; i < data.size(); i++) {
|
||||
|
||||
mean += data[i];
|
||||
|
||||
}
|
||||
|
||||
return mean / double(data.size());
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
void smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
|
||||
|
||||
int window_width = 2 * window_half_width + 1;
|
||||
|
||||
int size = tab_mflops.size();
|
||||
@ -106,38 +92,28 @@ void smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab
|
||||
vector<double> sample(window_width);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
||||
for (int j = 0; j < window_width; j++) {
|
||||
|
||||
int shifted_index = i + j - window_half_width;
|
||||
if (shifted_index < 0) shifted_index = 0;
|
||||
if (shifted_index > size - 1) shifted_index = size - 1;
|
||||
sample[j] = tab_mflops[shifted_index];
|
||||
|
||||
}
|
||||
|
||||
smooth_tab_mflops.push_back(weighted_mean(sample));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void centered_smooth_curve(const vector<double> &tab_mflops, vector<double> &smooth_tab_mflops, int window_half_width) {
|
||||
|
||||
int max_window_width = 2 * window_half_width + 1;
|
||||
|
||||
int size = tab_mflops.size();
|
||||
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
||||
deque<double> sample;
|
||||
|
||||
|
||||
sample.push_back(tab_mflops[i]);
|
||||
|
||||
for (int j = 1; j <= window_half_width; j++) {
|
||||
|
||||
int before = i - j;
|
||||
int after = i + j;
|
||||
|
||||
@ -149,32 +125,24 @@ void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & s
|
||||
}
|
||||
|
||||
smooth_tab_mflops.push_back(weighted_mean(sample));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void write_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
|
||||
|
||||
ofstream output_file(filename.c_str(), ios::out);
|
||||
|
||||
for (int i=0 ; i < tab_sizes.size() ; i++)
|
||||
{
|
||||
for (int i = 0; i < tab_sizes.size(); i++) {
|
||||
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl;
|
||||
}
|
||||
|
||||
output_file.close();
|
||||
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void read_xy_file(const string &filename, vector<int> &tab_sizes, vector<double> &tab_mflops) {
|
||||
|
||||
ifstream input_file(filename.c_str(), ios::in);
|
||||
|
||||
if (!input_file) {
|
||||
@ -195,4 +163,3 @@ void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<doubl
|
||||
|
||||
input_file.close();
|
||||
}
|
||||
|
||||
|
@ -38,10 +38,8 @@ extern "C" void cblas_saxpy(const int, const float, const float*, const int, flo
|
||||
using namespace std;
|
||||
|
||||
template <template <class> class Perf_Analyzer, class Action>
|
||||
BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
||||
{
|
||||
if (BtlConfig::skipAction(Action::name()))
|
||||
return;
|
||||
BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
|
||||
if (BtlConfig::skipAction(Action::name())) return;
|
||||
|
||||
string filename = "bench_" + Action::name() + ".dat";
|
||||
|
||||
@ -62,10 +60,10 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
||||
|
||||
// loop on matrix size
|
||||
Perf_Analyzer<Action> perf_action;
|
||||
for (int i=nb_point-1;i>=0;i--)
|
||||
{
|
||||
for (int i = nb_point - 1; i >= 0; i--) {
|
||||
// INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
|
||||
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
|
||||
std::cout << " "
|
||||
<< "size = " << tab_sizes[i] << " " << std::flush;
|
||||
|
||||
BTL_DISABLE_SSE_EXCEPTIONS();
|
||||
#ifdef HAVE_MKL
|
||||
@ -78,12 +76,9 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
||||
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
|
||||
std::cout << tab_mflops[i];
|
||||
|
||||
if (hasOldResults)
|
||||
{
|
||||
while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
|
||||
--oldi;
|
||||
if (oldi>=0 && oldSizes[oldi]==tab_sizes[i])
|
||||
{
|
||||
if (hasOldResults) {
|
||||
while (oldi >= 0 && oldSizes[oldi] > tab_sizes[i]) --oldi;
|
||||
if (oldi >= 0 && oldSizes[oldi] == tab_sizes[i]) {
|
||||
if (oldFlops[oldi] < tab_mflops[i])
|
||||
std::cout << "\t > ";
|
||||
else
|
||||
@ -95,45 +90,35 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
||||
std::cout << " MFlops (" << nb_point - i << "/" << nb_point << ")" << std::endl;
|
||||
}
|
||||
|
||||
if (!BtlConfig::Instance.overwriteResults)
|
||||
{
|
||||
if (hasOldResults)
|
||||
{
|
||||
if (!BtlConfig::Instance.overwriteResults) {
|
||||
if (hasOldResults) {
|
||||
// merge the two data
|
||||
std::vector<int> newSizes;
|
||||
std::vector<double> newFlops;
|
||||
unsigned int i = 0;
|
||||
unsigned int j = 0;
|
||||
while (i<tab_sizes.size() && j<oldSizes.size())
|
||||
{
|
||||
if (tab_sizes[i] == oldSizes[j])
|
||||
{
|
||||
while (i < tab_sizes.size() && j < oldSizes.size()) {
|
||||
if (tab_sizes[i] == oldSizes[j]) {
|
||||
newSizes.push_back(tab_sizes[i]);
|
||||
newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
|
||||
++i;
|
||||
++j;
|
||||
}
|
||||
else if (tab_sizes[i] < oldSizes[j])
|
||||
{
|
||||
} else if (tab_sizes[i] < oldSizes[j]) {
|
||||
newSizes.push_back(tab_sizes[i]);
|
||||
newFlops.push_back(tab_mflops[i]);
|
||||
++i;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
newSizes.push_back(oldSizes[j]);
|
||||
newFlops.push_back(oldFlops[j]);
|
||||
++j;
|
||||
}
|
||||
}
|
||||
while (i<tab_sizes.size())
|
||||
{
|
||||
while (i < tab_sizes.size()) {
|
||||
newSizes.push_back(tab_sizes[i]);
|
||||
newFlops.push_back(tab_mflops[i]);
|
||||
++i;
|
||||
}
|
||||
while (j<oldSizes.size())
|
||||
{
|
||||
while (j < oldSizes.size()) {
|
||||
newSizes.push_back(oldSizes[j]);
|
||||
newFlops.push_back(oldFlops[j]);
|
||||
++j;
|
||||
@ -145,24 +130,20 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
||||
|
||||
// dump the result in a file :
|
||||
dump_xy_file(tab_sizes, tab_mflops, filename);
|
||||
|
||||
}
|
||||
|
||||
// default Perf Analyzer
|
||||
|
||||
template <class Action>
|
||||
BTL_DONT_INLINE void bench(int size_min, int size_max, int nb_point) {
|
||||
|
||||
// if the rdtsc is not available :
|
||||
bench<Portable_Perf_Analyzer, Action>(size_min, size_max, nb_point);
|
||||
// if the rdtsc is available :
|
||||
// bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
|
||||
|
||||
// Only for small problem size. Otherwise it will be too long
|
||||
// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -47,15 +47,15 @@
|
||||
#ifdef __SSE__
|
||||
#include "xmmintrin.h"
|
||||
// This enables flush to zero (FTZ) and denormals are zero (DAZ) modes:
|
||||
#define BTL_DISABLE_SSE_EXCEPTIONS() { _mm_setcsr(_mm_getcsr() | 0x8040); }
|
||||
#define BTL_DISABLE_SSE_EXCEPTIONS() \
|
||||
{ _mm_setcsr(_mm_getcsr() | 0x8040); }
|
||||
#else
|
||||
#define BTL_DISABLE_SSE_EXCEPTIONS()
|
||||
#endif
|
||||
|
||||
/** Enhanced std::string
|
||||
*/
|
||||
class BtlString : public std::string
|
||||
{
|
||||
class BtlString : public std::string {
|
||||
public:
|
||||
BtlString() : std::string() {}
|
||||
BtlString(const BtlString& str) : std::string(static_cast<const std::string&>(str)) {}
|
||||
@ -64,38 +64,34 @@ public:
|
||||
|
||||
operator const char*() const { return c_str(); }
|
||||
|
||||
void trim( bool left = true, bool right = true )
|
||||
{
|
||||
void trim(bool left = true, bool right = true) {
|
||||
int lspaces, rspaces, len = length(), i;
|
||||
lspaces = rspaces = 0;
|
||||
|
||||
if (left)
|
||||
for (i=0; i<len && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); ++lspaces,++i);
|
||||
for (i = 0; i < len && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); ++lspaces, ++i)
|
||||
;
|
||||
|
||||
if (right && lspaces < len)
|
||||
for(i=len-1; i>=0 && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); rspaces++,i--);
|
||||
for (i = len - 1; i >= 0 && (at(i) == ' ' || at(i) == '\t' || at(i) == '\r' || at(i) == '\n'); rspaces++, i--)
|
||||
;
|
||||
|
||||
*this = substr(lspaces, len - lspaces - rspaces);
|
||||
}
|
||||
|
||||
std::vector<BtlString> split( const BtlString& delims = "\t\n ") const
|
||||
{
|
||||
std::vector<BtlString> split(const BtlString& delims = "\t\n ") const {
|
||||
std::vector<BtlString> ret;
|
||||
unsigned int numSplits = 0;
|
||||
size_t start, pos;
|
||||
start = 0;
|
||||
do
|
||||
{
|
||||
do {
|
||||
pos = find_first_of(delims, start);
|
||||
if (pos == start)
|
||||
{
|
||||
if (pos == start) {
|
||||
ret.push_back("");
|
||||
start = pos + 1;
|
||||
}
|
||||
else if (pos == npos)
|
||||
} else if (pos == npos)
|
||||
ret.push_back(substr(start));
|
||||
else
|
||||
{
|
||||
else {
|
||||
ret.push_back(substr(start, pos - start));
|
||||
start = pos + 1;
|
||||
}
|
||||
@ -105,38 +101,28 @@ public:
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool endsWith(const BtlString& str) const
|
||||
{
|
||||
if(str.size()>this->size())
|
||||
return false;
|
||||
bool endsWith(const BtlString& str) const {
|
||||
if (str.size() > this->size()) return false;
|
||||
return this->substr(this->size() - str.size(), str.size()) == str;
|
||||
}
|
||||
bool contains(const BtlString& str) const
|
||||
{
|
||||
return this->find(str)<this->size();
|
||||
}
|
||||
bool beginsWith(const BtlString& str) const
|
||||
{
|
||||
if(str.size()>this->size())
|
||||
return false;
|
||||
bool contains(const BtlString& str) const { return this->find(str) < this->size(); }
|
||||
bool beginsWith(const BtlString& str) const {
|
||||
if (str.size() > this->size()) return false;
|
||||
return this->substr(0, str.size()) == str;
|
||||
}
|
||||
|
||||
BtlString toLowerCase( void )
|
||||
{
|
||||
BtlString toLowerCase(void) {
|
||||
std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::tolower));
|
||||
return *this;
|
||||
}
|
||||
BtlString toUpperCase( void )
|
||||
{
|
||||
BtlString toUpperCase(void) {
|
||||
std::transform(begin(), end(), begin(), static_cast<int (*)(int)>(::toupper));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Case insensitive comparison.
|
||||
*/
|
||||
bool isEquiv(const BtlString& str) const
|
||||
{
|
||||
bool isEquiv(const BtlString& str) const {
|
||||
BtlString str0 = *this;
|
||||
str0.toLowerCase();
|
||||
BtlString str1 = str;
|
||||
@ -147,64 +133,45 @@ public:
|
||||
/** Decompose the current string as a path and a file.
|
||||
For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext"
|
||||
*/
|
||||
void decomposePathAndFile(BtlString& path, BtlString& filename) const
|
||||
{
|
||||
void decomposePathAndFile(BtlString& path, BtlString& filename) const {
|
||||
std::vector<BtlString> elements = this->split("/\\");
|
||||
path = "";
|
||||
filename = elements.back();
|
||||
elements.pop_back();
|
||||
if (this->at(0)=='/')
|
||||
path = "/";
|
||||
for (unsigned int i=0 ; i<elements.size() ; ++i)
|
||||
path += elements[i] + "/";
|
||||
if (this->at(0) == '/') path = "/";
|
||||
for (unsigned int i = 0; i < elements.size(); ++i) path += elements[i] + "/";
|
||||
}
|
||||
};
|
||||
|
||||
class BtlConfig
|
||||
{
|
||||
class BtlConfig {
|
||||
public:
|
||||
BtlConfig()
|
||||
: overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES)
|
||||
{
|
||||
BtlConfig() : overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES) {
|
||||
char* _config;
|
||||
_config = getenv("BTL_CONFIG");
|
||||
if (_config!=NULL)
|
||||
{
|
||||
if (_config != NULL) {
|
||||
std::vector<BtlString> config = BtlString(_config).split(" \t\n");
|
||||
for (unsigned int i = 0; i<config.size(); i++)
|
||||
{
|
||||
if (config[i].beginsWith("-a"))
|
||||
{
|
||||
if (i+1==config.size())
|
||||
{
|
||||
for (unsigned int i = 0; i < config.size(); i++) {
|
||||
if (config[i].beginsWith("-a")) {
|
||||
if (i + 1 == config.size()) {
|
||||
std::cerr << "error processing option: " << config[i] << "\n";
|
||||
exit(2);
|
||||
}
|
||||
Instance.m_selectedActionNames = config[i + 1].split(":");
|
||||
|
||||
i += 1;
|
||||
}
|
||||
else if (config[i].beginsWith("-t"))
|
||||
{
|
||||
if (i+1==config.size())
|
||||
{
|
||||
} else if (config[i].beginsWith("-t")) {
|
||||
if (i + 1 == config.size()) {
|
||||
std::cerr << "error processing option: " << config[i] << "\n";
|
||||
exit(2);
|
||||
}
|
||||
Instance.tries = atoi(config[i + 1].c_str());
|
||||
|
||||
i += 1;
|
||||
}
|
||||
else if (config[i].beginsWith("--overwrite"))
|
||||
{
|
||||
} else if (config[i].beginsWith("--overwrite")) {
|
||||
Instance.overwriteResults = true;
|
||||
}
|
||||
else if (config[i].beginsWith("--nocheck"))
|
||||
{
|
||||
} else if (config[i].beginsWith("--nocheck")) {
|
||||
Instance.checkResults = false;
|
||||
}
|
||||
else if (config[i].beginsWith("--real"))
|
||||
{
|
||||
} else if (config[i].beginsWith("--real")) {
|
||||
Instance.realclock = true;
|
||||
}
|
||||
}
|
||||
@ -213,15 +180,12 @@ public:
|
||||
BTL_DISABLE_SSE_EXCEPTIONS();
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE static bool skipAction(const std::string& _name)
|
||||
{
|
||||
if (Instance.m_selectedActionNames.empty())
|
||||
return false;
|
||||
BTL_DONT_INLINE static bool skipAction(const std::string& _name) {
|
||||
if (Instance.m_selectedActionNames.empty()) return false;
|
||||
|
||||
BtlString name(_name);
|
||||
for (unsigned int i = 0; i < Instance.m_selectedActionNames.size(); ++i)
|
||||
if (name.contains(Instance.m_selectedActionNames[i]))
|
||||
return false;
|
||||
if (name.contains(Instance.m_selectedActionNames[i])) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -236,7 +200,6 @@ protected:
|
||||
std::vector<BtlString> m_selectedActionNames;
|
||||
};
|
||||
|
||||
#define BTL_MAIN \
|
||||
BtlConfig BtlConfig::Instance
|
||||
#define BTL_MAIN BtlConfig BtlConfig::Instance
|
||||
|
||||
#endif // BTL_HH
|
||||
|
@ -20,35 +20,16 @@
|
||||
#ifndef INIT_FUNCTION_HH
|
||||
#define INIT_FUNCTION_HH
|
||||
|
||||
double simple_function(int index)
|
||||
{
|
||||
return index;
|
||||
}
|
||||
double simple_function(int index) { return index; }
|
||||
|
||||
double simple_function(int index_i, int index_j)
|
||||
{
|
||||
return index_i+index_j;
|
||||
}
|
||||
double simple_function(int index_i, int index_j) { return index_i + index_j; }
|
||||
|
||||
double pseudo_random(int /*index*/)
|
||||
{
|
||||
return std::rand()/double(RAND_MAX);
|
||||
}
|
||||
double pseudo_random(int /*index*/) { return std::rand() / double(RAND_MAX); }
|
||||
|
||||
double pseudo_random(int /*index_i*/, int /*index_j*/)
|
||||
{
|
||||
return std::rand()/double(RAND_MAX);
|
||||
}
|
||||
double pseudo_random(int /*index_i*/, int /*index_j*/) { return std::rand() / double(RAND_MAX); }
|
||||
|
||||
double null_function(int /*index*/) { return 0.0; }
|
||||
|
||||
double null_function(int /*index*/)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
double null_function(int /*index_i*/, int /*index_j*/)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
double null_function(int /*index_i*/, int /*index_j*/) { return 0.0; }
|
||||
|
||||
#endif
|
||||
|
@ -26,7 +26,6 @@
|
||||
// value_type defined
|
||||
template <double init_function(int, int), class Vector>
|
||||
BTL_DONT_INLINE void init_row(Vector& X, int size, int row) {
|
||||
|
||||
X.resize(size);
|
||||
|
||||
for (unsigned int j = 0; j < X.size(); j++) {
|
||||
@ -34,7 +33,6 @@ BTL_DONT_INLINE void init_row(Vector & X, int size, int row){
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Matrix is a Vector of Vector
|
||||
// The Matrix class must satisfy the following part of STL vector concept :
|
||||
// resize() method
|
||||
@ -50,8 +48,7 @@ BTL_DONT_INLINE void init_matrix(Vector & A, int size){
|
||||
template <double init_function(int, int), class Matrix>
|
||||
BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size) {
|
||||
A.resize(size);
|
||||
for (unsigned int row=0; row<A.size() ; row++)
|
||||
A[row].resize(size);
|
||||
for (unsigned int row = 0; row < A.size(); row++) A[row].resize(size);
|
||||
for (unsigned int row = 0; row < A.size(); row++) {
|
||||
A[row][row] = init_function(row, row);
|
||||
for (unsigned int col = 0; col < row; col++) {
|
||||
|
@ -26,7 +26,6 @@
|
||||
// value_type defined
|
||||
template <double init_function(int), class Vector>
|
||||
void init_vector(Vector& X, int size) {
|
||||
|
||||
X.resize(size);
|
||||
|
||||
for (unsigned int i = 0; i < X.size(); i++) {
|
||||
|
@ -32,12 +32,9 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
|
||||
BTL_DONT_INLINE void bench_static(void)
|
||||
{
|
||||
if (BtlConfig::skipAction(Action<Interface<REAL_TYPE,10> >::name()))
|
||||
return;
|
||||
BTL_DONT_INLINE void bench_static(void) {
|
||||
if (BtlConfig::skipAction(Action<Interface<REAL_TYPE, 10> >::name())) return;
|
||||
|
||||
string filename = "bench_" + Action<Interface<REAL_TYPE, 10> >::name() + ".dat";
|
||||
|
||||
@ -55,26 +52,10 @@ BTL_DONT_INLINE void bench_static(void)
|
||||
|
||||
// default Perf Analyzer
|
||||
template <template <class> class Action, template <class, int> class Interface>
|
||||
BTL_DONT_INLINE void bench_static(void)
|
||||
{
|
||||
BTL_DONT_INLINE void bench_static(void) {
|
||||
bench_static<Portable_Perf_Analyzer, Action, Interface>();
|
||||
// bench_static<Mixed_Perf_Analyzer,Action,Interface>();
|
||||
// bench_static<X86_Perf_Analyzer,Action,Interface>();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -24,15 +24,12 @@
|
||||
#include "function_time.hh"
|
||||
|
||||
template <class Action>
|
||||
double bench_fixed_size(int size, unsigned long long & nb_calc,unsigned long long & nb_init)
|
||||
{
|
||||
|
||||
double bench_fixed_size(int size, unsigned long long& nb_calc, unsigned long long& nb_init) {
|
||||
Action action(size);
|
||||
|
||||
double time_baseline = time_init(nb_init, action);
|
||||
|
||||
while (time_baseline < MIN_TIME) {
|
||||
|
||||
// INFOS("nb_init="<<nb_init);
|
||||
// INFOS("time_baseline="<<time_baseline);
|
||||
nb_init *= 2;
|
||||
@ -44,7 +41,6 @@ double bench_fixed_size(int size, unsigned long long & nb_calc,unsigned long lo
|
||||
double time_action = time_calculate(nb_calc, action);
|
||||
|
||||
while (time_action < MIN_TIME) {
|
||||
|
||||
nb_calc *= 2;
|
||||
time_action = time_calculate(nb_calc, action);
|
||||
}
|
||||
@ -52,7 +48,6 @@ double bench_fixed_size(int size, unsigned long long & nb_calc,unsigned long lo
|
||||
INFOS("nb_init=" << nb_init);
|
||||
INFOS("nb_calc=" << nb_calc);
|
||||
|
||||
|
||||
time_action = time_action / (double(nb_calc));
|
||||
|
||||
action.check_result();
|
||||
@ -60,7 +55,6 @@ double bench_fixed_size(int size, unsigned long long & nb_calc,unsigned long lo
|
||||
time_action = time_action - time_baseline;
|
||||
|
||||
return action.nb_op_base() / (time_action * 1000000.0);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -25,10 +25,10 @@ using namespace std;
|
||||
|
||||
// recursive generation of statically defined matrix and vector sizes
|
||||
|
||||
template <int SIZE,template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
|
||||
template <int SIZE, template <class> class Perf_Analyzer, template <class> class Action,
|
||||
template <class, int> class Interface>
|
||||
struct static_size_generator {
|
||||
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
|
||||
{
|
||||
static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
|
||||
tab_sizes.push_back(SIZE);
|
||||
std::cout << tab_sizes.back() << " \t" << std::flush;
|
||||
Perf_Analyzer<Action<Interface<REAL_TYPE, SIZE> > > perf_action;
|
||||
@ -42,8 +42,7 @@ struct static_size_generator{
|
||||
|
||||
template <template <class> class Perf_Analyzer, template <class> class Action, template <class, int> class Interface>
|
||||
struct static_size_generator<1, Perf_Analyzer, Action, Interface> {
|
||||
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
|
||||
{
|
||||
static void go(vector<double>& tab_sizes, vector<double>& tab_mflops) {
|
||||
tab_sizes.push_back(1);
|
||||
Perf_Analyzer<Action<Interface<REAL_TYPE, 1> > > perf_action;
|
||||
tab_mflops.push_back(perf_action.eval_mflops(1));
|
||||
@ -51,7 +50,3 @@ struct static_size_generator<1,Perf_Analyzer,Action,Interface>{
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -26,28 +26,21 @@
|
||||
template <class ACTION>
|
||||
class STL_Perf_Analyzer {
|
||||
public:
|
||||
STL_Perf_Analyzer(unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
|
||||
{
|
||||
STL_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
|
||||
MESSAGE("STL_Perf_Analyzer Ctor");
|
||||
};
|
||||
STL_Perf_Analyzer(const STL_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~STL_Perf_Analyzer( void ){
|
||||
MESSAGE("STL_Perf_Analyzer Dtor");
|
||||
};
|
||||
|
||||
|
||||
inline double eval_mflops(int size)
|
||||
{
|
||||
~STL_Perf_Analyzer(void) { MESSAGE("STL_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
ACTION action(size);
|
||||
|
||||
_chronos.start_baseline(_nb_sample);
|
||||
|
||||
do {
|
||||
|
||||
action.initialize();
|
||||
} while (_chronos.check());
|
||||
|
||||
@ -67,16 +60,11 @@ public:
|
||||
|
||||
return action.nb_op_base() / (corrected_time * 1000000.0);
|
||||
// return action.nb_op_base()/(calculate_time*1000000.0);
|
||||
|
||||
}
|
||||
private:
|
||||
|
||||
private:
|
||||
STL_Timer _chronos;
|
||||
unsigned long long _nb_sample;
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -39,14 +39,12 @@ public:
|
||||
initial = time(0);
|
||||
};
|
||||
// Start a series of r trials to determine baseline time:
|
||||
void start_baseline(unsigned int r)
|
||||
{
|
||||
void start_baseline(unsigned int r) {
|
||||
baseline = true;
|
||||
start(r);
|
||||
}
|
||||
// Returns true if the trials have been completed, else false
|
||||
bool check()
|
||||
{
|
||||
bool check() {
|
||||
++count;
|
||||
final = time(0);
|
||||
if (initial < final) {
|
||||
@ -57,11 +55,11 @@ public:
|
||||
return (iterations.size() < reps);
|
||||
};
|
||||
// Returns the results for external use
|
||||
double get_time( void )
|
||||
{
|
||||
double get_time(void) {
|
||||
sort(iterations.begin(), iterations.end());
|
||||
return 1.0 / iterations[reps / 2];
|
||||
};
|
||||
|
||||
private:
|
||||
unsigned int reps; // Number of trials
|
||||
// For storing loop iterations of a trial
|
||||
@ -75,4 +73,3 @@ private:
|
||||
// For recording the baseline time
|
||||
double baseline_time;
|
||||
};
|
||||
|
||||
|
@ -25,33 +25,24 @@
|
||||
|
||||
// choose portable perf analyzer for long calculations and x86 analyser for short ones
|
||||
|
||||
|
||||
template <class Action>
|
||||
class Mixed_Perf_Analyzer {
|
||||
|
||||
public:
|
||||
Mixed_Perf_Analyzer( void ):_x86pa(),_ppa(),_use_ppa(true)
|
||||
{
|
||||
MESSAGE("Mixed_Perf_Analyzer Ctor");
|
||||
};
|
||||
Mixed_Perf_Analyzer(void) : _x86pa(), _ppa(), _use_ppa(true) { MESSAGE("Mixed_Perf_Analyzer Ctor"); };
|
||||
Mixed_Perf_Analyzer(const Mixed_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~Mixed_Perf_Analyzer( void ){
|
||||
MESSAGE("Mixed_Perf_Analyzer Dtor");
|
||||
};
|
||||
|
||||
|
||||
inline double eval_mflops(int size)
|
||||
{
|
||||
~Mixed_Perf_Analyzer(void) { MESSAGE("Mixed_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
double result = 0.0;
|
||||
if (_use_ppa) {
|
||||
result = _ppa.eval_mflops(size);
|
||||
if (_ppa.get_nb_calc()>DEFAULT_NB_SAMPLE){_use_ppa=false;}
|
||||
if (_ppa.get_nb_calc() > DEFAULT_NB_SAMPLE) {
|
||||
_use_ppa = false;
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
result = _x86pa.eval_mflops(size);
|
||||
}
|
||||
|
||||
@ -59,15 +50,9 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Portable_Perf_Analyzer<Action> _ppa;
|
||||
X86_Perf_Analyzer<Action> _x86pa;
|
||||
bool _use_ppa;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -27,34 +27,29 @@
|
||||
template <class Action>
|
||||
class Portable_Perf_Analyzer {
|
||||
public:
|
||||
Portable_Perf_Analyzer( ):_nb_calc(0), m_time_action(0), _chronos(){
|
||||
MESSAGE("Portable_Perf_Analyzer Ctor");
|
||||
};
|
||||
Portable_Perf_Analyzer() : _nb_calc(0), m_time_action(0), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
|
||||
Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~Portable_Perf_Analyzer(){
|
||||
MESSAGE("Portable_Perf_Analyzer Dtor");
|
||||
};
|
||||
~Portable_Perf_Analyzer() { MESSAGE("Portable_Perf_Analyzer Dtor"); };
|
||||
|
||||
BTL_DONT_INLINE double eval_mflops(int size)
|
||||
{
|
||||
BTL_DONT_INLINE double eval_mflops(int size) {
|
||||
Action action(size);
|
||||
|
||||
// action.initialize();
|
||||
// time_action = time_calculate(action);
|
||||
while (m_time_action < MIN_TIME)
|
||||
{
|
||||
if(_nb_calc==0) _nb_calc = 1;
|
||||
else _nb_calc *= 2;
|
||||
while (m_time_action < MIN_TIME) {
|
||||
if (_nb_calc == 0)
|
||||
_nb_calc = 1;
|
||||
else
|
||||
_nb_calc *= 2;
|
||||
action.initialize();
|
||||
m_time_action = time_calculate(action);
|
||||
}
|
||||
|
||||
// optimize
|
||||
for (int i=1; i<BtlConfig::Instance.tries; ++i)
|
||||
{
|
||||
for (int i = 1; i < BtlConfig::Instance.tries; ++i) {
|
||||
Action _action(size);
|
||||
std::cout << " " << _action.nb_op_base() * _nb_calc / (m_time_action * 1e6) << " ";
|
||||
_action.initialize();
|
||||
@ -64,8 +59,7 @@ public:
|
||||
double time_action = m_time_action / (double(_nb_calc));
|
||||
|
||||
// check
|
||||
if (BtlConfig::Instance.checkResults && size<128)
|
||||
{
|
||||
if (BtlConfig::Instance.checkResults && size < 128) {
|
||||
action.initialize();
|
||||
action.calculate();
|
||||
action.check_result();
|
||||
@ -73,31 +67,23 @@ public:
|
||||
return action.nb_op_base() / (time_action * 1e6);
|
||||
}
|
||||
|
||||
BTL_DONT_INLINE double time_calculate(Action & action)
|
||||
{
|
||||
BTL_DONT_INLINE double time_calculate(Action& action) {
|
||||
// time measurement
|
||||
action.calculate();
|
||||
_chronos.start();
|
||||
for (unsigned int ii=0;ii<_nb_calc;ii++)
|
||||
{
|
||||
for (unsigned int ii = 0; ii < _nb_calc; ii++) {
|
||||
action.calculate();
|
||||
}
|
||||
_chronos.stop();
|
||||
return _chronos.user_time();
|
||||
}
|
||||
|
||||
unsigned long long get_nb_calc()
|
||||
{
|
||||
return _nb_calc;
|
||||
}
|
||||
|
||||
unsigned long long get_nb_calc() { return _nb_calc; }
|
||||
|
||||
private:
|
||||
unsigned long long _nb_calc;
|
||||
double m_time_action;
|
||||
Portable_Timer _chronos;
|
||||
|
||||
};
|
||||
|
||||
#endif //_PORTABLE_PERF_ANALYZER_HH
|
||||
|
||||
|
@ -26,22 +26,14 @@
|
||||
template <class Action>
|
||||
class Portable_Perf_Analyzer {
|
||||
public:
|
||||
Portable_Perf_Analyzer( void ):_nb_calc(1),_nb_init(1),_chronos(){
|
||||
MESSAGE("Portable_Perf_Analyzer Ctor");
|
||||
};
|
||||
Portable_Perf_Analyzer(void) : _nb_calc(1), _nb_init(1), _chronos() { MESSAGE("Portable_Perf_Analyzer Ctor"); };
|
||||
Portable_Perf_Analyzer(const Portable_Perf_Analyzer&) {
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~Portable_Perf_Analyzer( void ){
|
||||
MESSAGE("Portable_Perf_Analyzer Dtor");
|
||||
};
|
||||
|
||||
|
||||
|
||||
inline double eval_mflops(int size)
|
||||
{
|
||||
~Portable_Perf_Analyzer(void) { MESSAGE("Portable_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
Action action(size);
|
||||
|
||||
// double time_baseline = time_init(action);
|
||||
@ -58,15 +50,13 @@ public:
|
||||
// time_baseline = time_baseline/(double(_nb_init));
|
||||
|
||||
double time_action = time_calculate(action);
|
||||
while (time_action < MIN_TIME)
|
||||
{
|
||||
while (time_action < MIN_TIME) {
|
||||
_nb_calc *= 2;
|
||||
time_action = time_calculate(action);
|
||||
}
|
||||
|
||||
// optimize
|
||||
for (int i=1; i<NB_TRIES; ++i)
|
||||
time_action = std::min(time_action, time_calculate(action));
|
||||
for (int i = 1; i < NB_TRIES; ++i) time_action = std::min(time_action, time_calculate(action));
|
||||
|
||||
// INFOS("size="<<size);
|
||||
// INFOS("_nb_init="<<_nb_init);
|
||||
@ -76,14 +66,10 @@ public:
|
||||
|
||||
action.check_result();
|
||||
|
||||
|
||||
double time_baseline = time_init(action);
|
||||
for (int i=1; i<NB_TRIES; ++i)
|
||||
time_baseline = std::min(time_baseline, time_init(action));
|
||||
for (int i = 1; i < NB_TRIES; ++i) time_baseline = std::min(time_baseline, time_init(action));
|
||||
time_baseline = time_baseline / (double(_nb_init));
|
||||
|
||||
|
||||
|
||||
// INFOS("time_baseline="<<time_baseline);
|
||||
// INFOS("time_action="<<time_action);
|
||||
|
||||
@ -94,23 +80,18 @@ public:
|
||||
return action.nb_op_base() / (time_action * 1000000.0);
|
||||
}
|
||||
|
||||
inline double time_init(Action & action)
|
||||
{
|
||||
inline double time_init(Action& action) {
|
||||
// time measurement
|
||||
_chronos.start();
|
||||
for (int ii=0; ii<_nb_init; ii++)
|
||||
action.initialize();
|
||||
for (int ii = 0; ii < _nb_init; ii++) action.initialize();
|
||||
_chronos.stop();
|
||||
return _chronos.user_time();
|
||||
}
|
||||
|
||||
|
||||
inline double time_calculate(Action & action)
|
||||
{
|
||||
inline double time_calculate(Action& action) {
|
||||
// time measurement
|
||||
_chronos.start();
|
||||
for (int ii=0;ii<_nb_calc;ii++)
|
||||
{
|
||||
for (int ii = 0; ii < _nb_calc; ii++) {
|
||||
action.initialize();
|
||||
action.calculate();
|
||||
}
|
||||
@ -118,17 +99,12 @@ public:
|
||||
return _chronos.user_time();
|
||||
}
|
||||
|
||||
unsigned long long get_nb_calc( void )
|
||||
{
|
||||
return _nb_calc;
|
||||
}
|
||||
|
||||
unsigned long long get_nb_calc(void) { return _nb_calc; }
|
||||
|
||||
private:
|
||||
unsigned long long _nb_calc;
|
||||
unsigned long long _nb_init;
|
||||
Portable_Timer _chronos;
|
||||
|
||||
};
|
||||
|
||||
#endif //_PORTABLE_PERF_ANALYZER_HH
|
||||
|
@ -27,10 +27,8 @@
|
||||
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#define USEC_IN_SEC 1000000
|
||||
|
||||
|
||||
// timer -------------------------------------------------------------------//
|
||||
|
||||
// A timer object measures CPU time.
|
||||
@ -44,18 +42,14 @@
|
||||
#define hr_timer
|
||||
#endif*/
|
||||
|
||||
class Portable_Timer
|
||||
{
|
||||
class Portable_Timer {
|
||||
public:
|
||||
|
||||
typedef struct {
|
||||
LARGE_INTEGER start;
|
||||
LARGE_INTEGER stop;
|
||||
} stopWatch;
|
||||
|
||||
|
||||
Portable_Timer()
|
||||
{
|
||||
Portable_Timer() {
|
||||
startVal.QuadPart = 0;
|
||||
stopVal.QuadPart = 0;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
@ -73,58 +67,38 @@
|
||||
|
||||
double user_time() { return elapsed(); }
|
||||
|
||||
|
||||
private:
|
||||
|
||||
double LIToSecs(LARGE_INTEGER& L) {
|
||||
return ((double)L.QuadPart /(double)frequency.QuadPart) ;
|
||||
}
|
||||
double LIToSecs(LARGE_INTEGER& L) { return ((double)L.QuadPart / (double)frequency.QuadPart); }
|
||||
|
||||
LARGE_INTEGER startVal;
|
||||
LARGE_INTEGER stopVal;
|
||||
LARGE_INTEGER frequency;
|
||||
|
||||
|
||||
}; // Portable_Timer
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
#include <CoreServices/CoreServices.h>
|
||||
#include <mach/mach_time.h>
|
||||
|
||||
|
||||
class Portable_Timer
|
||||
{
|
||||
class Portable_Timer {
|
||||
public:
|
||||
Portable_Timer() {}
|
||||
|
||||
Portable_Timer()
|
||||
{
|
||||
void start() {
|
||||
m_start_time = double(mach_absolute_time()) * 1e-9;
|
||||
;
|
||||
}
|
||||
|
||||
void start()
|
||||
{
|
||||
m_start_time = double(mach_absolute_time())*1e-9;;
|
||||
|
||||
void stop() {
|
||||
m_stop_time = double(mach_absolute_time()) * 1e-9;
|
||||
;
|
||||
}
|
||||
|
||||
void stop()
|
||||
{
|
||||
m_stop_time = double(mach_absolute_time())*1e-9;;
|
||||
|
||||
}
|
||||
|
||||
double elapsed()
|
||||
{
|
||||
return user_time();
|
||||
}
|
||||
|
||||
double user_time()
|
||||
{
|
||||
return m_stop_time - m_start_time;
|
||||
}
|
||||
double elapsed() { return user_time(); }
|
||||
|
||||
double user_time() { return m_stop_time - m_start_time; }
|
||||
|
||||
private:
|
||||
|
||||
double m_stop_time, m_start_time;
|
||||
|
||||
}; // Portable_Timer (Apple)
|
||||
@ -136,47 +110,29 @@ private:
|
||||
#include <unistd.h>
|
||||
#include <sys/times.h>
|
||||
|
||||
class Portable_Timer
|
||||
{
|
||||
class Portable_Timer {
|
||||
public:
|
||||
Portable_Timer() { m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID; }
|
||||
|
||||
Portable_Timer()
|
||||
{
|
||||
m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID;
|
||||
}
|
||||
Portable_Timer(int clkid) : m_clkid(clkid) {}
|
||||
|
||||
Portable_Timer(int clkid) : m_clkid(clkid)
|
||||
{}
|
||||
|
||||
void start()
|
||||
{
|
||||
void start() {
|
||||
timespec ts;
|
||||
clock_gettime(m_clkid, &ts);
|
||||
m_start_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
|
||||
|
||||
}
|
||||
|
||||
void stop()
|
||||
{
|
||||
void stop() {
|
||||
timespec ts;
|
||||
clock_gettime(m_clkid, &ts);
|
||||
m_stop_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
|
||||
|
||||
}
|
||||
|
||||
double elapsed()
|
||||
{
|
||||
return user_time();
|
||||
}
|
||||
|
||||
double user_time()
|
||||
{
|
||||
return m_stop_time - m_start_time;
|
||||
}
|
||||
double elapsed() { return user_time(); }
|
||||
|
||||
double user_time() { return m_stop_time - m_start_time; }
|
||||
|
||||
private:
|
||||
|
||||
int m_clkid;
|
||||
double m_stop_time, m_start_time;
|
||||
|
||||
|
@ -26,8 +26,7 @@
|
||||
template <class ACTION>
|
||||
class X86_Perf_Analyzer {
|
||||
public:
|
||||
X86_Perf_Analyzer( unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
|
||||
{
|
||||
X86_Perf_Analyzer(unsigned long long nb_sample = DEFAULT_NB_SAMPLE) : _nb_sample(nb_sample), _chronos() {
|
||||
MESSAGE("X86_Perf_Analyzer Ctor");
|
||||
_chronos.find_frequency();
|
||||
};
|
||||
@ -35,14 +34,9 @@ public:
|
||||
INFOS("Copy Ctor not implemented");
|
||||
exit(0);
|
||||
};
|
||||
~X86_Perf_Analyzer( void ){
|
||||
MESSAGE("X86_Perf_Analyzer Dtor");
|
||||
};
|
||||
|
||||
|
||||
inline double eval_mflops(int size)
|
||||
{
|
||||
~X86_Perf_Analyzer(void) { MESSAGE("X86_Perf_Analyzer Dtor"); };
|
||||
|
||||
inline double eval_mflops(int size) {
|
||||
ACTION action(size);
|
||||
|
||||
int nb_loop = 5;
|
||||
@ -50,11 +44,9 @@ public:
|
||||
double baseline_time = 0.0;
|
||||
|
||||
for (int j = 0; j < nb_loop; j++) {
|
||||
|
||||
_chronos.clear();
|
||||
|
||||
for(int i=0 ; i < _nb_sample ; i++)
|
||||
{
|
||||
for (int i = 0; i < _nb_sample; i++) {
|
||||
_chronos.start();
|
||||
action.initialize();
|
||||
action.calculate();
|
||||
@ -68,22 +60,18 @@ public:
|
||||
|
||||
_chronos.clear();
|
||||
|
||||
for(int i=0 ; i < _nb_sample ; i++)
|
||||
{
|
||||
for (int i = 0; i < _nb_sample; i++) {
|
||||
_chronos.start();
|
||||
action.initialize();
|
||||
_chronos.stop();
|
||||
_chronos.add_get_click();
|
||||
|
||||
}
|
||||
|
||||
baseline_time += double(_chronos.get_shortest_clicks()) / _chronos.frequency();
|
||||
|
||||
}
|
||||
|
||||
double corrected_time = (calculate_time - baseline_time) / double(nb_loop);
|
||||
|
||||
|
||||
// INFOS("_nb_sample="<<_nb_sample);
|
||||
// INFOS("baseline_time="<<baseline_time);
|
||||
// INFOS("calculate_time="<<calculate_time);
|
||||
@ -96,13 +84,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
X86_Timer _chronos;
|
||||
unsigned long long _nb_sample;
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -40,49 +40,21 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
class X86_Timer {
|
||||
|
||||
public:
|
||||
X86_Timer(void) : _frequency(FREQUENCY), _nb_sample(0) { MESSAGE("X86_Timer Default Ctor"); }
|
||||
|
||||
X86_Timer( void ):_frequency(FREQUENCY),_nb_sample(0)
|
||||
{
|
||||
MESSAGE("X86_Timer Default Ctor");
|
||||
}
|
||||
inline void start(void) { rdtsc(_click_start.n32[0], _click_start.n32[1]); }
|
||||
|
||||
inline void start( void ){
|
||||
inline void stop(void) { rdtsc(_click_stop.n32[0], _click_stop.n32[1]); }
|
||||
|
||||
rdtsc(_click_start.n32[0],_click_start.n32[1]);
|
||||
inline double frequency(void) { return _frequency; }
|
||||
|
||||
}
|
||||
double get_elapsed_time_in_second(void) { return (_click_stop.n64 - _click_start.n64) / double(FREQUENCY); }
|
||||
|
||||
|
||||
inline void stop( void ){
|
||||
|
||||
rdtsc(_click_stop.n32[0],_click_stop.n32[1]);
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline double frequency( void ){
|
||||
return _frequency;
|
||||
}
|
||||
|
||||
double get_elapsed_time_in_second( void ){
|
||||
|
||||
return (_click_stop.n64-_click_start.n64)/double(FREQUENCY);
|
||||
|
||||
|
||||
}
|
||||
|
||||
unsigned long long get_click( void ){
|
||||
|
||||
return (_click_stop.n64-_click_start.n64);
|
||||
|
||||
}
|
||||
unsigned long long get_click(void) { return (_click_stop.n64 - _click_start.n64); }
|
||||
|
||||
inline void find_frequency(void) {
|
||||
|
||||
time_t initial, final;
|
||||
int dummy = 2;
|
||||
|
||||
@ -90,107 +62,76 @@ public :
|
||||
start();
|
||||
do {
|
||||
dummy += 2;
|
||||
}
|
||||
while(time(0)==initial);
|
||||
} while (time(0) == initial);
|
||||
// On est au debut d'un cycle d'une seconde !!!
|
||||
initial = time(0);
|
||||
start();
|
||||
do {
|
||||
dummy += 2;
|
||||
}
|
||||
while(time(0)==initial);
|
||||
} while (time(0) == initial);
|
||||
final = time(0);
|
||||
stop();
|
||||
// INFOS("fine grained time : "<< get_elapsed_time_in_second());
|
||||
// INFOS("coarse grained time : "<< final-initial);
|
||||
_frequency = _frequency * get_elapsed_time_in_second() / double(final - initial);
|
||||
/// INFOS("CPU frequency : "<< _frequency);
|
||||
|
||||
}
|
||||
|
||||
void add_get_click(void) {
|
||||
|
||||
_nb_sample++;
|
||||
_counted_clicks[get_click()]++;
|
||||
fill_history_clicks();
|
||||
|
||||
}
|
||||
|
||||
void dump_statistics(string filemane) {
|
||||
|
||||
ofstream outfile(filemane.c_str(), ios::out);
|
||||
|
||||
std::map<unsigned long long, unsigned long long>::iterator itr;
|
||||
for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++)
|
||||
{
|
||||
for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
|
||||
outfile << (*itr).first << " " << (*itr).second << endl;
|
||||
}
|
||||
|
||||
outfile.close();
|
||||
|
||||
}
|
||||
|
||||
void dump_history(string filemane) {
|
||||
|
||||
ofstream outfile(filemane.c_str(), ios::out);
|
||||
|
||||
|
||||
|
||||
for(int i=0 ; i<_history_mean_clicks.size() ; i++)
|
||||
{
|
||||
outfile << i << " "
|
||||
<< _history_mean_clicks[i] << " "
|
||||
<< _history_shortest_clicks[i] << " "
|
||||
for (int i = 0; i < _history_mean_clicks.size(); i++) {
|
||||
outfile << i << " " << _history_mean_clicks[i] << " " << _history_shortest_clicks[i] << " "
|
||||
<< _history_most_occured_clicks[i] << endl;
|
||||
}
|
||||
|
||||
outfile.close();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
double get_mean_clicks(void) {
|
||||
|
||||
std::map<unsigned long long, unsigned long long>::iterator itr;
|
||||
|
||||
unsigned long long mean_clicks = 0;
|
||||
|
||||
for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++)
|
||||
{
|
||||
|
||||
for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
|
||||
mean_clicks += (*itr).second * (*itr).first;
|
||||
}
|
||||
|
||||
return mean_clicks / double(_nb_sample);
|
||||
|
||||
}
|
||||
|
||||
double get_shortest_clicks( void ){
|
||||
|
||||
return double((*_counted_clicks.begin()).first);
|
||||
|
||||
}
|
||||
double get_shortest_clicks(void) { return double((*_counted_clicks.begin()).first); }
|
||||
|
||||
void fill_history_clicks(void) {
|
||||
|
||||
_history_mean_clicks.push_back(get_mean_clicks());
|
||||
_history_shortest_clicks.push_back(get_shortest_clicks());
|
||||
_history_most_occured_clicks.push_back(get_most_occured_clicks());
|
||||
|
||||
}
|
||||
|
||||
|
||||
double get_most_occured_clicks(void) {
|
||||
|
||||
unsigned long long moc = 0;
|
||||
unsigned long long max_occurence = 0;
|
||||
|
||||
std::map<unsigned long long, unsigned long long>::iterator itr;
|
||||
|
||||
for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++)
|
||||
{
|
||||
|
||||
for (itr = _counted_clicks.begin(); itr != _counted_clicks.end(); itr++) {
|
||||
if (max_occurence <= (*itr).second) {
|
||||
max_occurence = (*itr).second;
|
||||
moc = (*itr).first;
|
||||
@ -198,11 +139,9 @@ public :
|
||||
}
|
||||
|
||||
return double(moc);
|
||||
|
||||
}
|
||||
|
||||
void clear( void )
|
||||
{
|
||||
void clear(void) {
|
||||
_counted_clicks.clear();
|
||||
|
||||
_history_mean_clicks.clear();
|
||||
@ -212,18 +151,13 @@ public :
|
||||
_nb_sample = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private:
|
||||
|
||||
union
|
||||
{
|
||||
union {
|
||||
unsigned long int n32[2];
|
||||
unsigned long long n64;
|
||||
} _click_start;
|
||||
|
||||
union
|
||||
{
|
||||
union {
|
||||
unsigned long int n32[2];
|
||||
unsigned long long n64;
|
||||
} _click_stop;
|
||||
@ -237,10 +171,6 @@ private :
|
||||
vector<double> _history_most_occured_clicks;
|
||||
|
||||
unsigned long long _nb_sample;
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -23,36 +23,26 @@
|
||||
#include "size_log.hh"
|
||||
|
||||
template <class Vector>
|
||||
void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector & X)
|
||||
{
|
||||
void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector& X) {
|
||||
int ten = 10;
|
||||
int nine = 9;
|
||||
|
||||
X.resize(nb_point);
|
||||
|
||||
if (nb_point > ten) {
|
||||
|
||||
for (int i = 0; i < nine; i++) {
|
||||
|
||||
X[i] = i + 1;
|
||||
|
||||
}
|
||||
|
||||
Vector log_size;
|
||||
size_log(nb_point - nine, ten, size_max, log_size);
|
||||
|
||||
for (int i = 0; i < nb_point - nine; i++) {
|
||||
|
||||
X[i + nine] = log_size[i];
|
||||
|
||||
}
|
||||
}
|
||||
else{
|
||||
|
||||
} else {
|
||||
for (int i = 0; i < nb_point; i++) {
|
||||
|
||||
X[i] = i + 1;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -61,10 +51,6 @@ void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max
|
||||
// INFOS("computed sizes : X["<<i<<"]="<<X[i]);
|
||||
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -26,8 +26,7 @@
|
||||
// [] operator for setting element
|
||||
// the vector element are int compatible.
|
||||
template <class Vector>
|
||||
void size_log(const int nb_point, const int size_min, const int size_max, Vector & X)
|
||||
{
|
||||
void size_log(const int nb_point, const int size_min, const int size_max, Vector& X) {
|
||||
X.resize(nb_point);
|
||||
|
||||
float ls_min = log(float(size_min));
|
||||
@ -40,15 +39,12 @@ void size_log(const int nb_point, const int size_min, const int size_max, Vector
|
||||
int size = 0;
|
||||
|
||||
for (int i = 0; i < nb_point; i++) {
|
||||
|
||||
ls = ls_min + float(i) * delta_ls;
|
||||
|
||||
size = int(exp(ls));
|
||||
|
||||
X[i] = size;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -18,9 +18,19 @@
|
||||
|
||||
/* --- INFOS is always defined (without _DEBUG_): to be used for warnings, with release version --- */
|
||||
|
||||
# define HEREWEARE cout<<flush ; cerr << __FILE__ << " [" << __LINE__ << "] : " << flush ;
|
||||
# define INFOS(chain) {HEREWEARE ; cerr << chain << endl ;}
|
||||
# define PYSCRIPT(chain) {cout<<flush ; cerr << "---PYSCRIPT--- " << chain << endl ;}
|
||||
#define HEREWEARE \
|
||||
cout << flush; \
|
||||
cerr << __FILE__ << " [" << __LINE__ << "] : " << flush;
|
||||
#define INFOS(chain) \
|
||||
{ \
|
||||
HEREWEARE; \
|
||||
cerr << chain << endl; \
|
||||
}
|
||||
#define PYSCRIPT(chain) \
|
||||
{ \
|
||||
cout << flush; \
|
||||
cerr << "---PYSCRIPT--- " << chain << endl; \
|
||||
}
|
||||
|
||||
/* --- To print date and time of compilation of current source on stdout --- */
|
||||
|
||||
@ -39,7 +49,8 @@
|
||||
#ifdef INFOS_COMPILATION
|
||||
#error INFOS_COMPILATION already defined
|
||||
#endif
|
||||
# define INFOS_COMPILATION {\
|
||||
#define INFOS_COMPILATION \
|
||||
{ \
|
||||
cerr << flush; \
|
||||
cout << __FILE__; \
|
||||
cout << " [" << __LINE__ << "] : "; \
|
||||
@ -54,20 +65,48 @@
|
||||
|
||||
/* --- the following MACROS are useful at debug time --- */
|
||||
|
||||
# define HERE cout<<flush ; cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush ;
|
||||
# define SCRUTE(var) HERE ; cerr << #var << "=" << var << endl ;
|
||||
# define MESSAGE(chain) {HERE ; cerr << chain << endl ;}
|
||||
# define INTERRUPTION(code) HERE ; cerr << "INTERRUPTION return code= " << code << endl ; exit(code) ;
|
||||
#define HERE \
|
||||
cout << flush; \
|
||||
cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush;
|
||||
#define SCRUTE(var) \
|
||||
HERE; \
|
||||
cerr << #var << "=" << var << endl;
|
||||
#define MESSAGE(chain) \
|
||||
{ \
|
||||
HERE; \
|
||||
cerr << chain << endl; \
|
||||
}
|
||||
#define INTERRUPTION(code) \
|
||||
HERE; \
|
||||
cerr << "INTERRUPTION return code= " << code << endl; \
|
||||
exit(code);
|
||||
|
||||
#ifndef ASSERT
|
||||
# define ASSERT(condition) if (!(condition)){ HERE ; cerr << "CONDITION " << #condition << " NOT VERIFIED"<< endl ; INTERRUPTION(1) ;}
|
||||
#define ASSERT(condition) \
|
||||
if (!(condition)) { \
|
||||
HERE; \
|
||||
cerr << "CONDITION " << #condition << " NOT VERIFIED" << endl; \
|
||||
INTERRUPTION(1); \
|
||||
}
|
||||
#endif /* ASSERT */
|
||||
|
||||
#define REPERE cout<<flush ; cerr << " --------------" << endl << flush ;
|
||||
#define BEGIN_OF(chain) {REPERE ; HERE ; cerr << "Begin of: " << chain << endl ; REPERE ; }
|
||||
#define END_OF(chain) {REPERE ; HERE ; cerr << "Normal end of: " << chain << endl ; REPERE ; }
|
||||
|
||||
|
||||
#define REPERE \
|
||||
cout << flush; \
|
||||
cerr << " --------------" << endl << flush;
|
||||
#define BEGIN_OF(chain) \
|
||||
{ \
|
||||
REPERE; \
|
||||
HERE; \
|
||||
cerr << "Begin of: " << chain << endl; \
|
||||
REPERE; \
|
||||
}
|
||||
#define END_OF(chain) \
|
||||
{ \
|
||||
REPERE; \
|
||||
HERE; \
|
||||
cerr << "Normal end of: " << chain << endl; \
|
||||
REPERE; \
|
||||
}
|
||||
|
||||
#else /* ifdef _DEBUG_*/
|
||||
|
||||
@ -84,7 +123,6 @@
|
||||
#define BEGIN_OF(chain)
|
||||
#define END_OF(chain)
|
||||
|
||||
|
||||
#endif /* ifdef _DEBUG_*/
|
||||
|
||||
#endif /* ifndef UTILITIES_H */
|
||||
|
@ -25,10 +25,8 @@
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
|
||||
std::vector<double> & tab_mflops, bool quiet = false)
|
||||
{
|
||||
|
||||
bool read_xy_file(const std::string& filename, std::vector<int>& tab_sizes, std::vector<double>& tab_mflops,
|
||||
bool quiet = false) {
|
||||
std::ifstream input_file(filename.c_str(), std::ios::in);
|
||||
|
||||
if (!input_file) {
|
||||
@ -62,12 +60,10 @@ using namespace std;
|
||||
|
||||
template <class Vector_A, class Vector_B>
|
||||
void dump_xy_file(const Vector_A& X, const Vector_B& Y, const std::string& filename) {
|
||||
|
||||
ofstream outfile(filename.c_str(), ios::out);
|
||||
int size = X.size();
|
||||
|
||||
for (int i=0;i<size;i++)
|
||||
outfile << X[i] << " " << Y[i] << endl;
|
||||
for (int i = 0; i < size; i++) outfile << X[i] << " " << Y[i] << endl;
|
||||
|
||||
outfile.close();
|
||||
}
|
||||
|
@ -27,9 +27,7 @@ void BLASFUNC(zdotu) (double *, int *, double *, int *, double *, int *);
|
||||
void BLASFUNC(zdotc)(double *, int *, double *, int *, double *, int *);
|
||||
void BLASFUNC(xdotu)(double *, int *, double *, int *, double *, int *);
|
||||
void BLASFUNC(xdotc)(double *, int *, double *, int *, double *, int *);
|
||||
#elif defined(F_INTERFACE_F2C) || \
|
||||
defined(F_INTERFACE_PGI) || \
|
||||
defined(F_INTERFACE_GFORT) || \
|
||||
#elif defined(F_INTERFACE_F2C) || defined(F_INTERFACE_PGI) || defined(F_INTERFACE_GFORT) || \
|
||||
(defined(F_INTERFACE_PATHSCALE) && defined(__64BIT__))
|
||||
void BLASFUNC(cdotu)(float *, int *, float *, int *, float *, int *);
|
||||
void BLASFUNC(cdotc)(float *, int *, float *, int *, float *, int *);
|
||||
@ -179,50 +177,29 @@ int BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);
|
||||
|
||||
/* Level 2 routines */
|
||||
|
||||
int BLASFUNC(sger)(int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, int *);
|
||||
int BLASFUNC(dger)(int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, int *);
|
||||
int BLASFUNC(qger)(int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, int *);
|
||||
int BLASFUNC(cgeru)(int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, int *);
|
||||
int BLASFUNC(cgerc)(int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, int *);
|
||||
int BLASFUNC(zgeru)(int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, int *);
|
||||
int BLASFUNC(zgerc)(int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, int *);
|
||||
int BLASFUNC(xgeru)(int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, int *);
|
||||
int BLASFUNC(xgerc)(int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, int *);
|
||||
int BLASFUNC(sger)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dger)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qger)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(cgeru)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(cgerc)(int *, int *, float *, float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(zgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(strsv)(char *, char *, char *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dtrsv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qtrsv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(ctrsv)(char *, char *, char *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(ztrsv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xtrsv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(stpsv)(char *, char *, char *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dtpsv)(char *, char *, char *, int *, double *, double *, int *);
|
||||
@ -231,18 +208,12 @@ int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *);
|
||||
int BLASFUNC(ztpsv)(char *, char *, char *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xtpsv)(char *, char *, char *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(strmv)(char *, char *, char *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dtrmv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qtrmv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(ctrmv)(char *, char *, char *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(ztrmv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xtrmv)(char *, char *, char *, int *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(stpmv)(char *, char *, char *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dtpmv)(char *, char *, char *, int *, double *, double *, int *);
|
||||
@ -265,328 +236,227 @@ int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, floa
|
||||
int BLASFUNC(ztbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xtbsv)(char *, char *, char *, int *, int *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssymv) (char *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsymv) (char *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qsymv) (char *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(csymv) (char *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsymv) (char *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsymv) (char *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(ssymv)(char *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsymv)(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qsymv)(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(csymv)(char *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsymv)(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsymv)(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(sspmv) (char *, int *, float *, float *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dspmv) (char *, int *, double *, double *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qspmv) (char *, int *, double *, double *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(cspmv) (char *, int *, float *, float *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zspmv) (char *, int *, double *, double *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xspmv) (char *, int *, double *, double *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(sspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(cspmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xspmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssyr) (char *, int *, float *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(dsyr) (char *, int *, double *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(qsyr) (char *, int *, double *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(csyr) (char *, int *, float *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(zsyr) (char *, int *, double *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(xsyr) (char *, int *, double *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(ssyr)(char *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dsyr)(char *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qsyr)(char *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(csyr)(char *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zsyr)(char *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xsyr)(char *, int *, double *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssyr2) (char *, int *, float *,
|
||||
float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dsyr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qsyr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(csyr2) (char *, int *, float *,
|
||||
float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zsyr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xsyr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(ssyr2)(char *, int *, float *, float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dsyr2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qsyr2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(csyr2)(char *, int *, float *, float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zsyr2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xsyr2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(sspr) (char *, int *, float *, float *, int *,
|
||||
float *);
|
||||
int BLASFUNC(dspr) (char *, int *, double *, double *, int *,
|
||||
double *);
|
||||
int BLASFUNC(qspr) (char *, int *, double *, double *, int *,
|
||||
double *);
|
||||
int BLASFUNC(cspr) (char *, int *, float *, float *, int *,
|
||||
float *);
|
||||
int BLASFUNC(zspr) (char *, int *, double *, double *, int *,
|
||||
double *);
|
||||
int BLASFUNC(xspr) (char *, int *, double *, double *, int *,
|
||||
double *);
|
||||
int BLASFUNC(sspr)(char *, int *, float *, float *, int *, float *);
|
||||
int BLASFUNC(dspr)(char *, int *, double *, double *, int *, double *);
|
||||
int BLASFUNC(qspr)(char *, int *, double *, double *, int *, double *);
|
||||
int BLASFUNC(cspr)(char *, int *, float *, float *, int *, float *);
|
||||
int BLASFUNC(zspr)(char *, int *, double *, double *, int *, double *);
|
||||
int BLASFUNC(xspr)(char *, int *, double *, double *, int *, double *);
|
||||
|
||||
int BLASFUNC(sspr2) (char *, int *, float *,
|
||||
float *, int *, float *, int *, float *);
|
||||
int BLASFUNC(dspr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(qspr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(cspr2) (char *, int *, float *,
|
||||
float *, int *, float *, int *, float *);
|
||||
int BLASFUNC(zspr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(xspr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(sspr2)(char *, int *, float *, float *, int *, float *, int *, float *);
|
||||
int BLASFUNC(dspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(qspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(cspr2)(char *, int *, float *, float *, int *, float *, int *, float *);
|
||||
int BLASFUNC(zspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(xspr2)(char *, int *, double *, double *, int *, double *, int *, double *);
|
||||
|
||||
int BLASFUNC(cher) (char *, int *, float *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(zher) (char *, int *, double *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(xher) (char *, int *, double *, double *, int *,
|
||||
double *, int *);
|
||||
int BLASFUNC(cher)(char *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zher)(char *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xher)(char *, int *, double *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(chpr)(char *, int *, float *, float *, int *, float *);
|
||||
int BLASFUNC(zhpr)(char *, int *, double *, double *, int *, double *);
|
||||
int BLASFUNC(xhpr)(char *, int *, double *, double *, int *, double *);
|
||||
|
||||
int BLASFUNC(cher2) (char *, int *, float *,
|
||||
float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zher2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xher2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(cher2)(char *, int *, float *, float *, int *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xher2)(char *, int *, double *, double *, int *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(chpr2) (char *, int *, float *,
|
||||
float *, int *, float *, int *, float *);
|
||||
int BLASFUNC(zhpr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(xhpr2) (char *, int *, double *,
|
||||
double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(chpr2)(char *, int *, float *, float *, int *, float *, int *, float *);
|
||||
int BLASFUNC(zhpr2)(char *, int *, double *, double *, int *, double *, int *, double *);
|
||||
int BLASFUNC(xhpr2)(char *, int *, double *, double *, int *, double *, int *, double *);
|
||||
|
||||
int BLASFUNC(chemv) (char *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhemv) (char *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhemv) (char *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(chemv)(char *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhemv)(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhemv)(char *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(chpmv) (char *, int *, float *, float *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhpmv) (char *, int *, double *, double *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhpmv) (char *, int *, double *, double *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(chpmv)(char *, int *, float *, float *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhpmv)(char *, int *, double *, double *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(snorm)(char *, int *, int *, float *, int *);
|
||||
int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
|
||||
int BLASFUNC(cnorm)(char *, int *, int *, float *, int *);
|
||||
int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
|
||||
|
||||
/* Level 3 routines */
|
||||
|
||||
int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *,
|
||||
float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *,
|
||||
double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *,
|
||||
double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *,
|
||||
float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *,
|
||||
double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *,
|
||||
double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
|
||||
float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,
|
||||
double *, int *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,
|
||||
double *, int *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,
|
||||
float *, float *, int *, float *, int *,
|
||||
float *, float *, int *);
|
||||
int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *,
|
||||
double *, double *, int *);
|
||||
int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,
|
||||
float *, float *, int *, float *, int *,
|
||||
float *, float *, int *);
|
||||
int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *,
|
||||
double *, double *, int *);
|
||||
|
||||
int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *,
|
||||
float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *,
|
||||
float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *,
|
||||
float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *,
|
||||
float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *,
|
||||
double *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, float *, int *);
|
||||
int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, double *, int *);
|
||||
int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, double *, int *);
|
||||
int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, float *, int *);
|
||||
int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, double *, int *);
|
||||
int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, float *, int *);
|
||||
int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, double *, int *);
|
||||
int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double *, double *, int *);
|
||||
|
||||
int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *,
|
||||
float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
|
||||
double*, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
|
||||
int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *,
|
||||
double *, int *);
|
||||
int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *,
|
||||
float *, int *);
|
||||
int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
|
||||
int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, double *,
|
||||
double *, int *);
|
||||
|
||||
int BLASFUNC(sgema)(char *, char *, int *, int *, float *,
|
||||
float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
|
||||
double *, int *, double*, double *, int *, double*, int *);
|
||||
int BLASFUNC(cgema)(char *, char *, int *, int *, float *,
|
||||
float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
|
||||
double *, int *, double*, double *, int *, double*, int *);
|
||||
int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
|
||||
double *, int *);
|
||||
int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
|
||||
double *, int *);
|
||||
|
||||
int BLASFUNC(sgems)(char *, char *, int *, int *, float *,
|
||||
float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
|
||||
double *, int *, double*, double *, int *, double*, int *);
|
||||
int BLASFUNC(cgems)(char *, char *, int *, int *, float *,
|
||||
float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
|
||||
double *, int *, double*, double *, int *, double*, int *);
|
||||
int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *);
|
||||
int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *);
|
||||
|
||||
int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *);
|
||||
int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
|
||||
int *);
|
||||
int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *, float *, int *, float *, float *,
|
||||
int *);
|
||||
int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
|
||||
double *, int *);
|
||||
int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *, double *, int *, double *,
|
||||
double *, int *);
|
||||
|
||||
int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, double *, int *);
|
||||
|
||||
int BLASFUNC(sgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
|
||||
int *);
|
||||
int BLASFUNC(cgema)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zgema)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(sgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(dgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
|
||||
int *);
|
||||
int BLASFUNC(cgems)(char *, char *, int *, int *, float *, float *, int *, float *, float *, int *, float *, int *);
|
||||
int BLASFUNC(zgems)(char *, char *, int *, int *, double *, double *, int *, double *, double *, int *, double *,
|
||||
int *);
|
||||
|
||||
int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
|
||||
int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
|
||||
|
@ -22,17 +22,20 @@
|
||||
|
||||
#include <c_interface_base.h>
|
||||
#include <complex>
|
||||
extern "C"
|
||||
{
|
||||
extern "C" {
|
||||
#include "blas.h"
|
||||
|
||||
// Cholesky Factorization
|
||||
// void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
|
||||
// void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
|
||||
void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
|
||||
void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work, int *lwork, int *info );
|
||||
void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
|
||||
void dgehrd_( const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork, int *info );
|
||||
void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work,
|
||||
int *lwork, int *info);
|
||||
void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work,
|
||||
int *lwork, int *info);
|
||||
void sgehrd_(const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork,
|
||||
int *info);
|
||||
void dgehrd_(const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork,
|
||||
int *info);
|
||||
|
||||
// LU row pivoting
|
||||
// void dgetrf_( int *m, int *n, double *a, int *lda, int *ipiv, int *info );
|
||||
@ -50,9 +53,8 @@ extern "C"
|
||||
#define CAT2(A, B) A##B
|
||||
#define CAT(A, B) CAT2(A, B)
|
||||
|
||||
|
||||
template<class real> class blas_interface;
|
||||
|
||||
template <class real>
|
||||
class blas_interface;
|
||||
|
||||
static char notrans = 'N';
|
||||
static char trans = 'T';
|
||||
@ -62,15 +64,12 @@ static char right = 'R';
|
||||
static char left = 'L';
|
||||
static int intone = 1;
|
||||
|
||||
|
||||
|
||||
#define SCALAR float
|
||||
#define SCALAR_PREFIX s
|
||||
#include "blas_interface_impl.hh"
|
||||
#undef SCALAR
|
||||
#undef SCALAR_PREFIX
|
||||
|
||||
|
||||
#define SCALAR double
|
||||
#define SCALAR_PREFIX d
|
||||
#include "blas_interface_impl.hh"
|
||||
@ -78,6 +77,3 @@ static int intone = 1;
|
||||
#undef SCALAR_PREFIX
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -1,18 +1,13 @@
|
||||
|
||||
#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX, NAME), _)
|
||||
|
||||
template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
|
||||
{
|
||||
|
||||
template <>
|
||||
class blas_interface<SCALAR> : public c_interface_base<SCALAR> {
|
||||
public:
|
||||
|
||||
static SCALAR fone;
|
||||
static SCALAR fzero;
|
||||
|
||||
static inline std::string name()
|
||||
{
|
||||
return MAKE_STRING(CBLASNAME);
|
||||
}
|
||||
static inline std::string name() { return MAKE_STRING(CBLASNAME); }
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
BLAS_FUNC(gemv)(¬rans, &N, &N, &fone, A, &N, B, &intone, &fzero, X, &intone);
|
||||
@ -106,8 +101,6 @@ public :
|
||||
BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline void hessenberg(const gene_matrix& X, gene_matrix& C, int N) {
|
||||
{
|
||||
int N2 = N * N;
|
||||
@ -140,7 +133,6 @@ public :
|
||||
}
|
||||
|
||||
#endif // HAS_LAPACK
|
||||
|
||||
};
|
||||
|
||||
SCALAR blas_interface<SCALAR>::fone = SCALAR(1);
|
||||
|
@ -5,11 +5,9 @@
|
||||
#include "utilities.h"
|
||||
#include <vector>
|
||||
|
||||
template<class real> class c_interface_base
|
||||
{
|
||||
|
||||
template <class real>
|
||||
class c_interface_base {
|
||||
public:
|
||||
|
||||
typedef real real_type;
|
||||
typedef std::vector<real> stl_vector;
|
||||
typedef std::vector<stl_vector> stl_matrix;
|
||||
@ -17,47 +15,38 @@ public:
|
||||
typedef real* gene_matrix;
|
||||
typedef real* gene_vector;
|
||||
|
||||
static void free_matrix(gene_matrix & A, int /*N*/){
|
||||
delete[] A;
|
||||
}
|
||||
static void free_matrix(gene_matrix& A, int /*N*/) { delete[] A; }
|
||||
|
||||
static void free_vector(gene_vector & B){
|
||||
delete[] B;
|
||||
}
|
||||
static void free_vector(gene_vector& B) { delete[] B; }
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
int N = A_stl.size();
|
||||
A = new real[N * N];
|
||||
for (int j = 0; j < N; j++)
|
||||
for (int i=0;i<N;i++)
|
||||
A[i+N*j] = A_stl[j][i];
|
||||
for (int i = 0; i < N; i++) A[i + N * j] = A_stl[j][i];
|
||||
}
|
||||
|
||||
static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
|
||||
int N = B_stl.size();
|
||||
B = new real[N];
|
||||
for (int i=0;i<N;i++)
|
||||
B[i] = B_stl[i];
|
||||
for (int i = 0; i < N; i++) B[i] = B_stl[i];
|
||||
}
|
||||
|
||||
static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
|
||||
int N = B_stl.size();
|
||||
for (int i=0;i<N;i++)
|
||||
B_stl[i] = B[i];
|
||||
for (int i = 0; i < N; i++) B_stl[i] = B[i];
|
||||
}
|
||||
|
||||
static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
int N = A_stl.size();
|
||||
for (int j = 0; j < N; j++) {
|
||||
A_stl[j].resize(N);
|
||||
for (int i=0;i<N;i++)
|
||||
A_stl[j][i] = A[i+N*j];
|
||||
for (int i = 0; i < N; i++) A_stl[j][i] = A[i + N * j];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) {
|
||||
for (int i=0;i<N;i++)
|
||||
cible[i]=source[i];
|
||||
for (int i = 0; i < N; i++) cible[i] = source[i];
|
||||
}
|
||||
|
||||
static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) {
|
||||
@ -67,7 +56,6 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -33,9 +33,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
|
||||
@ -69,5 +67,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -27,9 +27,7 @@ using namespace std;
|
||||
|
||||
template <class real>
|
||||
class STL_interface {
|
||||
|
||||
public:
|
||||
|
||||
typedef real real_type;
|
||||
|
||||
typedef std::vector<real> stl_vector;
|
||||
@ -39,31 +37,19 @@ public :
|
||||
|
||||
typedef stl_vector gene_vector;
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "STL";
|
||||
}
|
||||
static inline std::string name(void) { return "STL"; }
|
||||
|
||||
static void free_matrix(gene_matrix& /*A*/, int /*N*/) {}
|
||||
|
||||
static void free_vector(gene_vector& /*B*/) {}
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||
A = A_stl;
|
||||
}
|
||||
static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) { A = A_stl; }
|
||||
|
||||
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
|
||||
B = B_stl;
|
||||
}
|
||||
static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) { B = B_stl; }
|
||||
|
||||
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
|
||||
B_stl = B ;
|
||||
}
|
||||
static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) { B_stl = B; }
|
||||
|
||||
|
||||
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||
A_stl = A ;
|
||||
}
|
||||
static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) { A_stl = A; }
|
||||
|
||||
static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
@ -71,37 +57,30 @@ public :
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) {
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j=0;j<N;j++)
|
||||
cible[i][j]=source[i][j];
|
||||
for (int j = 0; j < N; j++) cible[i][j] = source[i][j];
|
||||
}
|
||||
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) {
|
||||
real somme;
|
||||
for (int j = 0; j < N; j++) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
somme = 0.0;
|
||||
if(i>=j)
|
||||
{
|
||||
for (int k=0;k<N;k++)
|
||||
somme += A[i][k]*A[j][k];
|
||||
if (i >= j) {
|
||||
for (int k = 0; k < N; k++) somme += A[i][k] * A[j][k];
|
||||
X[j][i] = somme;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) {
|
||||
real somme;
|
||||
for (int j = 0; j < N; j++) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
somme = 0.0;
|
||||
if(i>=j)
|
||||
{
|
||||
if (i >= j) {
|
||||
for (int k = 0; k < N; k++) {
|
||||
somme += A[k][i] * A[k][j];
|
||||
}
|
||||
@ -111,37 +90,29 @@ public :
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
|
||||
{
|
||||
static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
|
||||
real somme;
|
||||
for (int j = 0; j < N; j++) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
somme = 0.0;
|
||||
for (int k=0;k<N;k++)
|
||||
somme+=A[k][i]*B[j][k];
|
||||
for (int k = 0; k < N; k++) somme += A[k][i] * B[j][k];
|
||||
X[j][i] = somme;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
real somme;
|
||||
for (int i = 0; i < N; i++) {
|
||||
somme = 0.0;
|
||||
for (int j=0;j<N;j++)
|
||||
somme+=A[j][i]*B[j];
|
||||
for (int j = 0; j < N; j++) somme += A[j][i] * B[j];
|
||||
X[i] = somme;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
for (int j=0; j<N; ++j)
|
||||
X[j] = 0;
|
||||
for (int j=0; j<N; ++j)
|
||||
{
|
||||
static inline void symv(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
for (int j = 0; j < N; ++j) X[j] = 0;
|
||||
for (int j = 0; j < N; ++j) {
|
||||
real t1 = B[j];
|
||||
real t2 = 0;
|
||||
X[j] += t1 * A[j][j];
|
||||
@ -153,58 +124,45 @@ public :
|
||||
}
|
||||
}
|
||||
|
||||
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
for (int j=0; j<N; ++j)
|
||||
{
|
||||
for (int i=j; i<N; ++i)
|
||||
A[j][i] += B[i]*X[j] + B[j]*X[i];
|
||||
static inline void syr2(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
for (int j = 0; j < N; ++j) {
|
||||
for (int i = j; i < N; ++i) A[j][i] += B[i] * X[j] + B[j] * X[i];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N)
|
||||
{
|
||||
for (int j=0; j<N; ++j)
|
||||
{
|
||||
for (int i=j; i<N; ++i)
|
||||
A[j][i] += X[i]*Y[j];
|
||||
static inline void ger(gene_matrix& A, gene_vector& X, gene_vector& Y, int N) {
|
||||
for (int j = 0; j < N; ++j) {
|
||||
for (int i = j; i < N; ++i) A[j][i] += X[i] * Y[j];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
real somme;
|
||||
for (int i = 0; i < N; i++) {
|
||||
somme = 0.0;
|
||||
for (int j=0;j<N;j++)
|
||||
somme += A[i][j]*B[j];
|
||||
for (int j = 0; j < N; j++) somme += A[i][j] * B[j];
|
||||
X[i] = somme;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void axpy(real coef, const gene_vector& X, gene_vector& Y, int N) {
|
||||
for (int i=0;i<N;i++)
|
||||
Y[i]+=coef*X[i];
|
||||
for (int i = 0; i < N; i++) Y[i] += coef * X[i];
|
||||
}
|
||||
|
||||
static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) {
|
||||
for (int i=0;i<N;i++)
|
||||
Y[i] = a*X[i] + b*Y[i];
|
||||
for (int i = 0; i < N; i++) Y[i] = a * X[i] + b * Y[i];
|
||||
}
|
||||
|
||||
static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
|
||||
copy_vector(B, X, N);
|
||||
for(int i=0; i<N; ++i)
|
||||
{
|
||||
for (int i = 0; i < N; ++i) {
|
||||
X[i] /= L[i][i];
|
||||
real tmp = X[i];
|
||||
for (int j=i+1; j<N; ++j)
|
||||
X[j] -= tmp * L[i][j];
|
||||
for (int j = i + 1; j < N; ++j) X[j] -= tmp * L[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
static inline real norm_diff(const stl_vector & A, const stl_vector & B)
|
||||
{
|
||||
static inline real norm_diff(const stl_vector& A, const stl_vector& B) {
|
||||
int N = A.size();
|
||||
real somme = 0.0;
|
||||
real somme2 = 0.0;
|
||||
@ -217,8 +175,7 @@ public :
|
||||
return somme / somme2;
|
||||
}
|
||||
|
||||
static inline real norm_diff(const stl_matrix & A, const stl_matrix & B)
|
||||
{
|
||||
static inline real norm_diff(const stl_matrix& A, const stl_matrix& B) {
|
||||
int N = A[0].size();
|
||||
real somme = 0.0;
|
||||
real somme2 = 0.0;
|
||||
@ -234,14 +191,12 @@ public :
|
||||
return somme / somme2;
|
||||
}
|
||||
|
||||
static inline void display_vector(const stl_vector & A)
|
||||
{
|
||||
static inline void display_vector(const stl_vector& A) {
|
||||
int N = A.size();
|
||||
for (int i = 0; i < N; i++) {
|
||||
INFOS("A[" << i << "]=" << A[i] << endl);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -24,8 +24,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
@ -38,5 +37,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -27,9 +27,7 @@
|
||||
|
||||
template <class real>
|
||||
class blaze_interface {
|
||||
|
||||
public:
|
||||
|
||||
typedef real real_type;
|
||||
|
||||
typedef std::vector<real> stl_vector;
|
||||
@ -40,13 +38,9 @@ public :
|
||||
|
||||
static inline std::string name() { return "blaze"; }
|
||||
|
||||
static void free_matrix(gene_matrix & A, int N){
|
||||
return ;
|
||||
}
|
||||
static void free_matrix(gene_matrix& A, int N) { return; }
|
||||
|
||||
static void free_vector(gene_vector & B){
|
||||
return ;
|
||||
}
|
||||
static void free_vector(gene_vector& B) { return; }
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
A.resize(A_stl[0].size(), A_stl.size());
|
||||
@ -81,21 +75,19 @@ public :
|
||||
}
|
||||
}
|
||||
|
||||
static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
|
||||
int N) {
|
||||
X = (A * B);
|
||||
}
|
||||
|
||||
static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B,
|
||||
gene_matrix& X, int N) {
|
||||
X = (trans(A) * trans(B));
|
||||
}
|
||||
|
||||
static EIGEN_DONT_INLINE void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
X = (trans(A)*A);
|
||||
}
|
||||
static EIGEN_DONT_INLINE void ata_product(const gene_matrix& A, gene_matrix& X, int N) { X = (trans(A) * A); }
|
||||
|
||||
static EIGEN_DONT_INLINE void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
X = (A*trans(A));
|
||||
}
|
||||
static EIGEN_DONT_INLINE void aat_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A * trans(A)); }
|
||||
|
||||
static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
X = (A * B);
|
||||
@ -105,9 +97,7 @@ public :
|
||||
X = (trans(A) * B);
|
||||
}
|
||||
|
||||
static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
Y += coef * X;
|
||||
}
|
||||
static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }
|
||||
|
||||
static EIGEN_DONT_INLINE void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) {
|
||||
Y = a * X + b * Y;
|
||||
@ -128,14 +118,9 @@ public :
|
||||
// X = lower_trisolve(L, B);
|
||||
// }
|
||||
|
||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||
cible = source;
|
||||
}
|
||||
|
||||
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
|
||||
cible = source;
|
||||
}
|
||||
static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { cible = source; }
|
||||
|
||||
static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { cible = source; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -22,9 +22,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench<Action_axpy<blaze_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<blaze_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
|
||||
@ -36,5 +34,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -26,67 +26,40 @@
|
||||
BZ_USING_NAMESPACE(blitz)
|
||||
|
||||
template <class real>
|
||||
class blitz_LU_solve_interface : public blitz_interface<real>
|
||||
{
|
||||
|
||||
class blitz_LU_solve_interface : public blitz_interface<real> {
|
||||
public:
|
||||
|
||||
typedef typename blitz_interface<real>::gene_matrix gene_matrix;
|
||||
typedef typename blitz_interface<real>::gene_vector gene_vector;
|
||||
|
||||
typedef blitz::Array<int, 1> Pivot_Vector;
|
||||
|
||||
inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
|
||||
{
|
||||
inline static void new_Pivot_Vector(Pivot_Vector &pivot, int N) { pivot.resize(N); }
|
||||
|
||||
pivot.resize(N);
|
||||
|
||||
}
|
||||
|
||||
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
|
||||
{
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
|
||||
{
|
||||
inline static void free_Pivot_Vector(Pivot_Vector &pivot) { return; }
|
||||
|
||||
static inline real matrix_vector_product_sliced(const gene_matrix &A, gene_vector B, int row, int col_start,
|
||||
int col_end) {
|
||||
real somme = 0.;
|
||||
|
||||
for (int j = col_start; j < col_end + 1; j++) {
|
||||
|
||||
somme += A(row, j) * B(j);
|
||||
|
||||
}
|
||||
|
||||
return somme;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
|
||||
{
|
||||
|
||||
static inline real matrix_matrix_product_sliced(gene_matrix &A, int row, int col_start, int col_end, gene_matrix &B,
|
||||
int row_shift, int col) {
|
||||
real somme = 0.;
|
||||
|
||||
for (int j = col_start; j < col_end + 1; j++) {
|
||||
|
||||
somme += A(row, j) * B(j + row_shift, col);
|
||||
|
||||
}
|
||||
|
||||
return somme;
|
||||
|
||||
}
|
||||
|
||||
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
|
||||
{
|
||||
|
||||
inline static void LU_factor(gene_matrix &LU, Pivot_Vector &pivot, int N) {
|
||||
ASSERT(LU.rows() == LU.cols());
|
||||
int index_max = 0;
|
||||
real big = 0.;
|
||||
@ -144,12 +117,9 @@ public :
|
||||
for (int i = j + 1; i < N; i++) LU(i, j) *= dum;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
|
||||
{
|
||||
|
||||
inline static void LU_solve(const gene_matrix &LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N) {
|
||||
// Pour conserver le meme header, on travaille sur X, copie du second-membre B
|
||||
X = B.copy();
|
||||
ASSERT(LU.rows() == LU.cols());
|
||||
@ -184,9 +154,7 @@ public :
|
||||
X(i) = theSum / LU(i, i);
|
||||
// B( i ) = theSum/LU( i, i ) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -32,9 +32,7 @@ BZ_USING_NAMESPACE(blitz)
|
||||
|
||||
template <class real>
|
||||
class blitz_interface {
|
||||
|
||||
public:
|
||||
|
||||
typedef real real_type;
|
||||
|
||||
typedef std::vector<real> stl_vector;
|
||||
@ -77,51 +75,44 @@ public :
|
||||
int N = A_stl.size();
|
||||
for (int j = 0; j < N; j++) {
|
||||
A_stl[j].resize(N);
|
||||
for (int i=0;i<N;i++)
|
||||
A_stl[j][i] = A(i,j);
|
||||
for (int i = 0; i < N; i++) A_stl[j][i] = A(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
|
||||
{
|
||||
static inline void matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X, int N) {
|
||||
firstIndex i;
|
||||
secondIndex j;
|
||||
thirdIndex k;
|
||||
X = sum(A(i, k) * B(k, j), k);
|
||||
}
|
||||
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) {
|
||||
firstIndex i;
|
||||
secondIndex j;
|
||||
thirdIndex k;
|
||||
X = sum(A(k, i) * A(k, j), k);
|
||||
}
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) {
|
||||
firstIndex i;
|
||||
secondIndex j;
|
||||
thirdIndex k;
|
||||
X = sum(A(i, k) * A(j, k), k);
|
||||
}
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
firstIndex i;
|
||||
secondIndex j;
|
||||
X = sum(A(i, j) * B(j), j);
|
||||
}
|
||||
|
||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
firstIndex i;
|
||||
secondIndex j;
|
||||
X = sum(A(j, i) * B(j), j);
|
||||
}
|
||||
|
||||
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N)
|
||||
{
|
||||
static inline void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) {
|
||||
firstIndex i;
|
||||
Y = Y(i) + coef * X(i);
|
||||
// Y += coef * X;
|
||||
@ -141,7 +132,6 @@ public :
|
||||
// cible.template operator=<gene_vector>(source);
|
||||
cible = source;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -31,9 +31,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench<Action_matrix_vector_product<blitz_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
bench<Action_atv_product<blitz_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
|
||||
@ -47,5 +45,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -26,13 +26,10 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench_static<Action_axpy, tiny_blitz_interface>();
|
||||
bench_static<Action_matrix_matrix_product, tiny_blitz_interface>();
|
||||
bench_static<Action_matrix_vector_product, tiny_blitz_interface>();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -31,11 +31,8 @@
|
||||
BZ_USING_NAMESPACE(blitz)
|
||||
|
||||
template <class real, int SIZE>
|
||||
class tiny_blitz_interface
|
||||
{
|
||||
|
||||
class tiny_blitz_interface {
|
||||
public:
|
||||
|
||||
typedef real real_type;
|
||||
|
||||
typedef std::vector<real> stl_vector;
|
||||
@ -52,34 +49,28 @@ public :
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
for (int j = 0; j < A_stl.size(); j++)
|
||||
for (int i=0; i<A_stl[j].size() ; i++)
|
||||
A(i,j)=A_stl[j][i];
|
||||
for (int i = 0; i < A_stl[j].size(); i++) A(i, j) = A_stl[j][i];
|
||||
}
|
||||
|
||||
static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) {
|
||||
for (int i=0; i<B_stl.size() ; i++)
|
||||
B(i) = B_stl[i];
|
||||
for (int i = 0; i < B_stl.size(); i++) B(i) = B_stl[i];
|
||||
}
|
||||
|
||||
static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) {
|
||||
for (int i=0; i<B_stl.size() ; i++)
|
||||
B_stl[i] = B(i);
|
||||
for (int i = 0; i < B_stl.size(); i++) B_stl[i] = B(i);
|
||||
}
|
||||
|
||||
static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
int N = A_stl.size();
|
||||
for (int j=0;j<N;j++)
|
||||
{
|
||||
for (int j = 0; j < N; j++) {
|
||||
A_stl[j].resize(N);
|
||||
for (int i=0;i<N;i++)
|
||||
A_stl[j][i] = A(i,j);
|
||||
for (int i = 0; i < N; i++) A_stl[j][i] = A(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) {
|
||||
for (int j = 0; j < N; j++)
|
||||
for (int i=0;i<N;i++)
|
||||
cible(i,j) = source(i,j);
|
||||
for (int i = 0; i < N; i++) cible(i, j) = source(i, j);
|
||||
}
|
||||
|
||||
static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) {
|
||||
@ -92,15 +83,9 @@ public :
|
||||
X = product(A, B);
|
||||
}
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||
X = product(A,B);
|
||||
}
|
||||
|
||||
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
Y += coef * X;
|
||||
}
|
||||
static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) { X = product(A, B); }
|
||||
|
||||
static inline void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -30,9 +30,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench_static<Action_axpy, eigen2_interface>();
|
||||
bench_static<Action_matrix_matrix_product, eigen2_interface>();
|
||||
bench_static<Action_matrix_vector_product, eigen2_interface>();
|
||||
@ -42,5 +40,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -28,11 +28,8 @@
|
||||
using namespace Eigen;
|
||||
|
||||
template <class real, int SIZE = Dynamic>
|
||||
class eigen2_interface
|
||||
{
|
||||
|
||||
class eigen2_interface {
|
||||
public:
|
||||
|
||||
enum { IsFixedSize = (SIZE != Dynamic) };
|
||||
|
||||
typedef real real_type;
|
||||
@ -43,14 +40,22 @@ public :
|
||||
typedef Eigen::Matrix<real, SIZE, SIZE> gene_matrix;
|
||||
typedef Eigen::Matrix<real, SIZE, 1> gene_vector;
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
static inline std::string name(void) {
|
||||
#if defined(EIGEN_VECTORIZE_SSE)
|
||||
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
|
||||
if (SIZE == Dynamic)
|
||||
return "eigen2";
|
||||
else
|
||||
return "tiny_eigen2";
|
||||
#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
|
||||
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
|
||||
if (SIZE == Dynamic)
|
||||
return "eigen2";
|
||||
else
|
||||
return "tiny_eigen2";
|
||||
#else
|
||||
if (SIZE==Dynamic) return "eigen2_novec"; else return "tiny_eigen2_novec";
|
||||
if (SIZE == Dynamic)
|
||||
return "eigen2_novec";
|
||||
else
|
||||
return "tiny_eigen2_novec";
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -97,17 +102,14 @@ public :
|
||||
X = (A * B).lazy();
|
||||
}
|
||||
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
|
||||
int N) {
|
||||
X = (A.transpose() * B.transpose()).lazy();
|
||||
}
|
||||
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
X = (A.transpose()*A).lazy();
|
||||
}
|
||||
static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A.transpose() * A).lazy(); }
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
X = (A*A.transpose()).lazy();
|
||||
}
|
||||
static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A * A.transpose()).lazy(); }
|
||||
|
||||
static inline void matrix_vector_product(const gene_matrix& A, const gene_vector& B, gene_vector& X, int N) {
|
||||
X = (A * B) /*.lazy()*/;
|
||||
@ -117,21 +119,13 @@ public :
|
||||
X = (A.transpose() * B) /*.lazy()*/;
|
||||
}
|
||||
|
||||
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
Y += coef * X;
|
||||
}
|
||||
static inline void axpy(real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }
|
||||
|
||||
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||
Y = a*X + b*Y;
|
||||
}
|
||||
static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) { Y = a * X + b * Y; }
|
||||
|
||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||
cible = source;
|
||||
}
|
||||
static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { cible = source; }
|
||||
|
||||
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
|
||||
cible = source;
|
||||
}
|
||||
static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { cible = source; }
|
||||
|
||||
static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
|
||||
X = L.template marked<LowerTriangular>().solveTriangular(B);
|
||||
@ -160,9 +154,6 @@ public :
|
||||
static inline void hessenberg(const gene_matrix& X, gene_matrix& C, int N) {
|
||||
C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -27,8 +27,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
bench<Action_trisolve_matrix<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
@ -40,5 +39,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,13 +22,9 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,8 +22,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
// bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
@ -31,5 +30,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,8 +22,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
// bench<Action_symv<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
@ -32,5 +31,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,9 +30,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench_static<Action_axpy, eigen2_interface>();
|
||||
bench_static<Action_matrix_matrix_product, eigen2_interface>();
|
||||
bench_static<Action_matrix_vector_product, eigen2_interface>();
|
||||
@ -42,5 +40,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -25,11 +25,8 @@
|
||||
using namespace Eigen;
|
||||
|
||||
template <class real, int SIZE = Dynamic>
|
||||
class eigen3_interface
|
||||
{
|
||||
|
||||
class eigen3_interface {
|
||||
public:
|
||||
|
||||
enum { IsFixedSize = (SIZE != Dynamic) };
|
||||
|
||||
typedef real real_type;
|
||||
@ -40,10 +37,7 @@ public :
|
||||
typedef Eigen::Matrix<real, SIZE, SIZE> gene_matrix;
|
||||
typedef Eigen::Matrix<real, SIZE, 1> gene_vector;
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return EIGEN_MAKESTRING(BTL_PREFIX);
|
||||
}
|
||||
static inline std::string name(void) { return EIGEN_MAKESTRING(BTL_PREFIX); }
|
||||
|
||||
static void free_matrix(gene_matrix& /*A*/, int /*N*/) {}
|
||||
|
||||
@ -88,7 +82,8 @@ public :
|
||||
X.noalias() = A * B;
|
||||
}
|
||||
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int /*N*/){
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
|
||||
int /*N*/) {
|
||||
X.noalias() = A.transpose() * B.transpose();
|
||||
}
|
||||
|
||||
@ -109,17 +104,17 @@ public :
|
||||
|
||||
static inline void symv(const gene_matrix& A, const gene_vector& B, gene_vector& X, int /*N*/) {
|
||||
X.noalias() = (A.template selfadjointView<Lower>() * B);
|
||||
// internal::product_selfadjoint_vector<real,0,LowerTriangularBit,false,false>(N,A.data(),N, B.data(), 1, X.data(), 1);
|
||||
// internal::product_selfadjoint_vector<real,0,LowerTriangularBit,false,false>(N,A.data(),N, B.data(), 1,
|
||||
// X.data(), 1);
|
||||
}
|
||||
|
||||
template<typename Dest, typename Src> static void triassign(Dest& dst, const Src& src)
|
||||
{
|
||||
template <typename Dest, typename Src>
|
||||
static void triassign(Dest& dst, const Src& src) {
|
||||
typedef typename Dest::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
const int PacketSize = sizeof(Packet) / sizeof(Scalar);
|
||||
int size = dst.cols();
|
||||
for(int j=0; j<size; j+=1)
|
||||
{
|
||||
for (int j = 0; j < size; j += 1) {
|
||||
// const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
||||
Scalar* A0 = dst.data() + j * dst.stride();
|
||||
int starti = j;
|
||||
@ -128,8 +123,7 @@ public :
|
||||
alignedEnd = alignedStart + ((size - alignedStart) / (2 * PacketSize)) * (PacketSize * 2);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for (int index = starti; index<alignedStart ; ++index)
|
||||
{
|
||||
for (int index = starti; index < alignedStart; ++index) {
|
||||
if (Dest::Flags & RowMajorBit)
|
||||
dst.copyCoeff(j, index, src);
|
||||
else
|
||||
@ -137,8 +131,7 @@ public :
|
||||
}
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
for (int index = alignedStart; index<alignedEnd; index+=PacketSize)
|
||||
{
|
||||
for (int index = alignedStart; index < alignedEnd; index += PacketSize) {
|
||||
if (Dest::Flags & RowMajorBit)
|
||||
dst.template copyPacket<Src, Aligned, Unaligned>(j, index, src);
|
||||
else
|
||||
@ -146,8 +139,7 @@ public :
|
||||
}
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for (int index = alignedEnd; index<size; ++index)
|
||||
{
|
||||
for (int index = alignedEnd; index < size; ++index) {
|
||||
if (Dest::Flags & RowMajorBit)
|
||||
dst.copyCoeff(j, index, src);
|
||||
else
|
||||
@ -158,14 +150,13 @@ public :
|
||||
}
|
||||
|
||||
static EIGEN_DONT_INLINE void syr2(gene_matrix& A, gene_vector& X, gene_vector& Y, int N) {
|
||||
// internal::product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1, -1);
|
||||
for(int j=0; j<N; ++j)
|
||||
A.col(j).tail(N-j) += X[j] * Y.tail(N-j) + Y[j] * X.tail(N-j);
|
||||
// internal::product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1,
|
||||
// -1);
|
||||
for (int j = 0; j < N; ++j) A.col(j).tail(N - j) += X[j] * Y.tail(N - j) + Y[j] * X.tail(N - j);
|
||||
}
|
||||
|
||||
static EIGEN_DONT_INLINE void ger(gene_matrix& A, gene_vector& X, gene_vector& Y, int N) {
|
||||
for(int j=0; j<N; ++j)
|
||||
A.col(j) += X * Y[j];
|
||||
for (int j = 0; j < N; ++j) A.col(j) += X * Y[j];
|
||||
}
|
||||
|
||||
static EIGEN_DONT_INLINE void rot(gene_vector& A, gene_vector& B, real c, real s, int /*N*/) {
|
||||
@ -176,13 +167,9 @@ public :
|
||||
X.noalias() = (A.transpose() * B);
|
||||
}
|
||||
|
||||
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int /*N*/){
|
||||
Y += coef * X;
|
||||
}
|
||||
static inline void axpy(real coef, const gene_vector& X, gene_vector& Y, int /*N*/) { Y += coef * X; }
|
||||
|
||||
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int /*N*/){
|
||||
Y = a*X + b*Y;
|
||||
}
|
||||
static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int /*N*/) { Y = a * X + b * Y; }
|
||||
|
||||
static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix& source, gene_matrix& cible, int /*N*/) {
|
||||
cible = source;
|
||||
@ -213,9 +200,7 @@ public :
|
||||
// Cholesky<gene_matrix>::computeInPlaceBlock(C);
|
||||
}
|
||||
|
||||
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int /*N*/){
|
||||
C = X.fullPivLu().matrixLU();
|
||||
}
|
||||
static inline void lu_decomp(const gene_matrix& X, gene_matrix& C, int /*N*/) { C = X.fullPivLu().matrixLU(); }
|
||||
|
||||
static inline void partial_lu_decomp(const gene_matrix& X, gene_matrix& C, int N) {
|
||||
Matrix<DenseIndex, 1, Dynamic> piv(N);
|
||||
@ -234,9 +219,6 @@ public :
|
||||
static inline void hessenberg(const gene_matrix& X, gene_matrix& C, int /*N*/) {
|
||||
C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -27,8 +27,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_trisolve<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
|
||||
bench<Action_trisolve_matrix<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
|
||||
bench<Action_cholesky<eigen3_interface<REAL_TYPE> > >(MIN_LU, MAX_LU, NB_POINT);
|
||||
@ -40,5 +39,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,14 +22,10 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench<Action_axpy<eigen3_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<eigen3_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_rot<eigen3_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,8 +22,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
@ -31,5 +30,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,8 +22,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_matrix_vector_product<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
bench<Action_atv_product<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
bench<Action_symv<eigen3_interface<REAL_TYPE> > >(MIN_MV, MAX_MV, NB_POINT);
|
||||
@ -32,5 +31,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -26,67 +26,40 @@
|
||||
BZ_USING_NAMESPACE(blitz)
|
||||
|
||||
template <class real>
|
||||
class blitz_LU_solve_interface : public blitz_interface<real>
|
||||
{
|
||||
|
||||
class blitz_LU_solve_interface : public blitz_interface<real> {
|
||||
public:
|
||||
|
||||
typedef typename blitz_interface<real>::gene_matrix gene_matrix;
|
||||
typedef typename blitz_interface<real>::gene_vector gene_vector;
|
||||
|
||||
typedef blitz::Array<int, 1> Pivot_Vector;
|
||||
|
||||
inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
|
||||
{
|
||||
inline static void new_Pivot_Vector(Pivot_Vector &pivot, int N) { pivot.resize(N); }
|
||||
|
||||
pivot.resize(N);
|
||||
|
||||
}
|
||||
|
||||
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
|
||||
{
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
|
||||
{
|
||||
inline static void free_Pivot_Vector(Pivot_Vector &pivot) { return; }
|
||||
|
||||
static inline real matrix_vector_product_sliced(const gene_matrix &A, gene_vector B, int row, int col_start,
|
||||
int col_end) {
|
||||
real somme = 0.;
|
||||
|
||||
for (int j = col_start; j < col_end + 1; j++) {
|
||||
|
||||
somme += A(row, j) * B(j);
|
||||
|
||||
}
|
||||
|
||||
return somme;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
|
||||
{
|
||||
|
||||
static inline real matrix_matrix_product_sliced(gene_matrix &A, int row, int col_start, int col_end, gene_matrix &B,
|
||||
int row_shift, int col) {
|
||||
real somme = 0.;
|
||||
|
||||
for (int j = col_start; j < col_end + 1; j++) {
|
||||
|
||||
somme += A(row, j) * B(j + row_shift, col);
|
||||
|
||||
}
|
||||
|
||||
return somme;
|
||||
|
||||
}
|
||||
|
||||
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
|
||||
{
|
||||
|
||||
inline static void LU_factor(gene_matrix &LU, Pivot_Vector &pivot, int N) {
|
||||
ASSERT(LU.rows() == LU.cols());
|
||||
int index_max = 0;
|
||||
real big = 0.;
|
||||
@ -144,12 +117,9 @@ public :
|
||||
for (int i = j + 1; i < N; i++) LU(i, j) *= dum;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
|
||||
{
|
||||
|
||||
inline static void LU_solve(const gene_matrix &LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N) {
|
||||
// Pour conserver le meme header, on travaille sur X, copie du second-membre B
|
||||
X = B.copy();
|
||||
ASSERT(LU.rows() == LU.cols());
|
||||
@ -184,9 +154,7 @@ public :
|
||||
X(i) = theSum / LU(i, i);
|
||||
// B( i ) = theSum/LU( i, i ) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -25,9 +25,7 @@ using namespace gmm;
|
||||
|
||||
template <class real>
|
||||
class gmm_interface {
|
||||
|
||||
public:
|
||||
|
||||
typedef real real_type;
|
||||
|
||||
typedef std::vector<real> stl_vector;
|
||||
@ -36,18 +34,11 @@ public :
|
||||
typedef gmm::dense_matrix<real> gene_matrix;
|
||||
typedef stl_vector gene_vector;
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "gmm";
|
||||
}
|
||||
static inline std::string name(void) { return "gmm"; }
|
||||
|
||||
static void free_matrix(gene_matrix & A, int N){
|
||||
return ;
|
||||
}
|
||||
static void free_matrix(gene_matrix& A, int N) { return; }
|
||||
|
||||
static void free_vector(gene_vector & B){
|
||||
return ;
|
||||
}
|
||||
static void free_vector(gene_vector& B) { return; }
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
A.resize(A_stl[0].size(), A_stl.size());
|
||||
@ -59,13 +50,9 @@ public :
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
|
||||
B = B_stl;
|
||||
}
|
||||
static inline void vector_from_stl(gene_vector& B, stl_vector& B_stl) { B = B_stl; }
|
||||
|
||||
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
|
||||
B_stl = B;
|
||||
}
|
||||
static inline void vector_to_stl(gene_vector& B, stl_vector& B_stl) { B_stl = B; }
|
||||
|
||||
static inline void matrix_to_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
int N = A_stl.size();
|
||||
@ -82,17 +69,14 @@ public :
|
||||
gmm::mult(A, B, X);
|
||||
}
|
||||
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
|
||||
int N) {
|
||||
gmm::mult(gmm::transposed(A), gmm::transposed(B), X);
|
||||
}
|
||||
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
gmm::mult(gmm::transposed(A),A, X);
|
||||
}
|
||||
static inline void ata_product(const gene_matrix& A, gene_matrix& X, int N) { gmm::mult(gmm::transposed(A), A, X); }
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
gmm::mult(A,gmm::transposed(A), X);
|
||||
}
|
||||
static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) { gmm::mult(A, gmm::transposed(A), X); }
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) {
|
||||
gmm::mult(A, B, X);
|
||||
@ -110,13 +94,9 @@ public :
|
||||
gmm::add(gmm::scaled(X, a), gmm::scaled(Y, b), Y);
|
||||
}
|
||||
|
||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||
gmm::copy(source,cible);
|
||||
}
|
||||
static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { gmm::copy(source, cible); }
|
||||
|
||||
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
|
||||
gmm::copy(source,cible);
|
||||
}
|
||||
static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { gmm::copy(source, cible); }
|
||||
|
||||
static inline void trisolve_lower(const gene_matrix& L, const gene_vector& B, gene_vector& X, int N) {
|
||||
gmm::copy(B, X);
|
||||
@ -138,7 +118,6 @@ public :
|
||||
gmm::copy(X, R);
|
||||
gmm::Householder_tridiagonalization(R, X, false);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -24,9 +24,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
|
||||
@ -47,5 +45,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -24,9 +24,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int main() {
|
||||
bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
|
||||
@ -42,5 +40,3 @@ int main()
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -26,67 +26,40 @@
|
||||
BZ_USING_NAMESPACE(blitz)
|
||||
|
||||
template <class real>
|
||||
class blitz_LU_solve_interface : public blitz_interface<real>
|
||||
{
|
||||
|
||||
class blitz_LU_solve_interface : public blitz_interface<real> {
|
||||
public:
|
||||
|
||||
typedef typename blitz_interface<real>::gene_matrix gene_matrix;
|
||||
typedef typename blitz_interface<real>::gene_vector gene_vector;
|
||||
|
||||
typedef blitz::Array<int, 1> Pivot_Vector;
|
||||
|
||||
inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
|
||||
{
|
||||
inline static void new_Pivot_Vector(Pivot_Vector &pivot, int N) { pivot.resize(N); }
|
||||
|
||||
pivot.resize(N);
|
||||
|
||||
}
|
||||
|
||||
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
|
||||
{
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
|
||||
{
|
||||
inline static void free_Pivot_Vector(Pivot_Vector &pivot) { return; }
|
||||
|
||||
static inline real matrix_vector_product_sliced(const gene_matrix &A, gene_vector B, int row, int col_start,
|
||||
int col_end) {
|
||||
real somme = 0.;
|
||||
|
||||
for (int j = col_start; j < col_end + 1; j++) {
|
||||
|
||||
somme += A(row, j) * B(j);
|
||||
|
||||
}
|
||||
|
||||
return somme;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
|
||||
{
|
||||
|
||||
static inline real matrix_matrix_product_sliced(gene_matrix &A, int row, int col_start, int col_end, gene_matrix &B,
|
||||
int row_shift, int col) {
|
||||
real somme = 0.;
|
||||
|
||||
for (int j = col_start; j < col_end + 1; j++) {
|
||||
|
||||
somme += A(row, j) * B(j + row_shift, col);
|
||||
|
||||
}
|
||||
|
||||
return somme;
|
||||
|
||||
}
|
||||
|
||||
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
|
||||
{
|
||||
|
||||
inline static void LU_factor(gene_matrix &LU, Pivot_Vector &pivot, int N) {
|
||||
ASSERT(LU.rows() == LU.cols());
|
||||
int index_max = 0;
|
||||
real big = 0.;
|
||||
@ -144,12 +117,9 @@ public :
|
||||
for (int i = j + 1; i < N; i++) LU(i, j) *= dum;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
|
||||
{
|
||||
|
||||
inline static void LU_solve(const gene_matrix &LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N) {
|
||||
// Pour conserver le meme header, on travaille sur X, copie du second-membre B
|
||||
X = B.copy();
|
||||
ASSERT(LU.rows() == LU.cols());
|
||||
@ -184,9 +154,7 @@ public :
|
||||
X(i) = theSum / LU(i, i);
|
||||
// B( i ) = theSum/LU( i, i ) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -27,9 +27,7 @@ using namespace mtl;
|
||||
|
||||
template <class real>
|
||||
class mtl4_interface {
|
||||
|
||||
public:
|
||||
|
||||
typedef real real_type;
|
||||
|
||||
typedef std::vector<real> stl_vector;
|
||||
@ -40,13 +38,9 @@ public :
|
||||
|
||||
static inline std::string name() { return "mtl4"; }
|
||||
|
||||
static void free_matrix(gene_matrix & A, int N){
|
||||
return ;
|
||||
}
|
||||
static void free_matrix(gene_matrix& A, int N) { return; }
|
||||
|
||||
static void free_vector(gene_vector & B){
|
||||
return ;
|
||||
}
|
||||
static void free_vector(gene_vector& B) { return; }
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix& A, stl_matrix& A_stl) {
|
||||
A.change_dim(A_stl[0].size(), A_stl.size());
|
||||
@ -88,7 +82,8 @@ public :
|
||||
// X = (A*C);
|
||||
}
|
||||
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||
static inline void transposed_matrix_matrix_product(const gene_matrix& A, const gene_matrix& B, gene_matrix& X,
|
||||
int N) {
|
||||
X = (trans(A) * trans(B));
|
||||
}
|
||||
|
||||
@ -96,25 +91,15 @@ public :
|
||||
// X = (trans(A)*A);
|
||||
// }
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
X = (A*trans(A));
|
||||
}
|
||||
static inline void aat_product(const gene_matrix& A, gene_matrix& X, int N) { X = (A * trans(A)); }
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||
X = (A*B);
|
||||
}
|
||||
static inline void matrix_vector_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) { X = (A * B); }
|
||||
|
||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||
X = (trans(A)*B);
|
||||
}
|
||||
static inline void atv_product(gene_matrix& A, gene_vector& B, gene_vector& X, int N) { X = (trans(A) * B); }
|
||||
|
||||
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
Y += coef * X;
|
||||
}
|
||||
static inline void axpy(const real coef, const gene_vector& X, gene_vector& Y, int N) { Y += coef * X; }
|
||||
|
||||
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||
Y = a*X + b*Y;
|
||||
}
|
||||
static inline void axpby(real a, const gene_vector& X, real b, gene_vector& Y, int N) { Y = a * X + b * Y; }
|
||||
|
||||
// static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
|
||||
// C = X;
|
||||
@ -131,14 +116,9 @@ public :
|
||||
X = lower_trisolve(L, B);
|
||||
}
|
||||
|
||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||
cible = source;
|
||||
}
|
||||
|
||||
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
|
||||
cible = source;
|
||||
}
|
||||
static inline void copy_matrix(const gene_matrix& source, gene_matrix& cible, int N) { cible = source; }
|
||||
|
||||
static inline void copy_vector(const gene_vector& source, gene_vector& cible, int N) { cible = source; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -14,8 +14,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_axpy<tensor_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
bench<Action_axpby<tensor_interface<REAL_TYPE> > >(MIN_AXPY, MAX_AXPY, NB_POINT);
|
||||
|
||||
|
@ -13,8 +13,7 @@
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
int main() {
|
||||
bench<Action_matrix_matrix_product<tensor_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT);
|
||||
|
||||
return 0;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user