Clang-format tests, examples, libraries, benchmarks, etc.

This commit is contained in:
Antonio Sánchez 2023-12-05 21:22:55 +00:00 committed by Rasmus Munk Larsen
parent 3252ecc7a4
commit 46e9cdb7fe
876 changed files with 33453 additions and 37795 deletions

View File

@ -20,63 +20,51 @@ using namespace Eigen;
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
typedef Matrix<Scalar, Dynamic, 1> DenseVector;
typedef SparseMatrix<Scalar> EigenSparseMatrix;
void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst)
{
dst.reserve(double(rows)*cols*density);
for(int j = 0; j < cols; j++)
{
for(int i = 0; i < rows; i++)
{
Scalar v = (internal::random<float>(0,1) < density) ? internal::random<Scalar>() : 0;
if (v!=0)
dst.insert(i,j) = v;
void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst) {
dst.reserve(double(rows) * cols * density);
for (int j = 0; j < cols; j++) {
for (int i = 0; i < rows; i++) {
Scalar v = (internal::random<float>(0, 1) < density) ? internal::random<Scalar>() : 0;
if (v != 0) dst.insert(i, j) = v;
}
}
dst.finalize();
}
void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst)
{
// std::cout << "alloc " << nnzPerCol*cols << "\n";
dst.reserve(nnzPerCol*cols);
for(int j = 0; j < cols; j++)
{
void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst) {
// std::cout << "alloc " << nnzPerCol*cols << "\n";
dst.reserve(nnzPerCol * cols);
for (int j = 0; j < cols; j++) {
std::set<int> aux;
for(int i = 0; i < nnzPerCol; i++)
{
int k = internal::random<int>(0,rows-1);
while (aux.find(k)!=aux.end())
k = internal::random<int>(0,rows-1);
for (int i = 0; i < nnzPerCol; i++) {
int k = internal::random<int>(0, rows - 1);
while (aux.find(k) != aux.end()) k = internal::random<int>(0, rows - 1);
aux.insert(k);
dst.insert(k,j) = internal::random<Scalar>();
dst.insert(k, j) = internal::random<Scalar>();
}
}
dst.finalize();
}
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst)
{
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst) {
dst.setZero();
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
dst(it.index(),j) = it.value();
for (int j = 0; j < src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
}
#ifndef NOGMM
#include "gmm/gmm.h"
typedef gmm::csc_matrix<Scalar> GmmSparse;
typedef gmm::col_matrix< gmm::wsvector<Scalar> > GmmDynSparse;
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
{
typedef gmm::col_matrix<gmm::wsvector<Scalar> > GmmDynSparse;
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst) {
GmmDynSparse tmp(src.rows(), src.cols());
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
tmp(it.index(),j) = it.value();
for (int j = 0; j < src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) tmp(it.index(), j) = it.value();
gmm::copy(tmp, dst);
}
#endif
@ -85,12 +73,10 @@ void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
#include <boost/numeric/mtl/mtl.hpp>
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
{
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst) {
mtl::matrix::inserter<MtlSparse> ins(dst);
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
ins[it.index()][j] = it.value();
for (int j = 0; j < src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) ins[it.index()][j] = it.value();
}
#endif
@ -98,20 +84,18 @@ void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
extern "C" {
#include "cs.h"
}
void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
{
cs* aux = cs_spalloc (0, 0, 1, 1, 1);
for (int j=0; j<src.cols(); ++j)
void eiToCSparse(const EigenSparseMatrix& src, cs*& dst) {
cs* aux = cs_spalloc(0, 0, 1, 1, 1);
for (int j = 0; j < src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
if (!cs_entry(aux, it.index(), j, it.value()))
{
if (!cs_entry(aux, it.index(), j, it.value())) {
std::cout << "cs_entry error\n";
exit(2);
}
dst = cs_compress(aux);
// cs_spfree(aux);
dst = cs_compress(aux);
// cs_spfree(aux);
}
#endif // CSPARSE
#endif // CSPARSE
#ifndef NOUBLAS
#include <boost/numeric/ublas/vector.hpp>
@ -123,22 +107,18 @@ void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
#include <boost/numeric/ublas/vector_of_vector.hpp>
#include <boost/numeric/ublas/operation.hpp>
typedef boost::numeric::ublas::compressed_matrix<Scalar,boost::numeric::ublas::column_major> UBlasSparse;
typedef boost::numeric::ublas::compressed_matrix<Scalar, boost::numeric::ublas::column_major> UBlasSparse;
void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst)
{
void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst) {
dst.resize(src.rows(), src.cols(), false);
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
dst(it.index(),j) = it.value();
for (int j = 0; j < src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it) dst(it.index(), j) = it.value();
}
template <typename EigenType, typename UblasType>
void eiToUblasVec(const EigenType& src, UblasType& dst)
{
void eiToUblasVec(const EigenType& src, UblasType& dst) {
dst.resize(src.size());
for (int j=0; j<src.size(); ++j)
dst[j] = src.coeff(j);
for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
}
#endif

View File

@ -12,19 +12,19 @@
#define EIGEN_BENCH_TIMERR_H
#if defined(_WIN32) || defined(__CYGWIN__)
# ifndef NOMINMAX
# define NOMINMAX
# define EIGEN_BT_UNDEF_NOMINMAX
# endif
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
# endif
# include <windows.h>
#ifndef NOMINMAX
#define NOMINMAX
#define EIGEN_BT_UNDEF_NOMINMAX
#endif
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
# include <unistd.h>
#include <unistd.h>
#endif
static void escape(void *p) {
@ -41,27 +41,20 @@ static void clobber() {
#include <Eigen/Core>
namespace Eigen
{
namespace Eigen {
enum {
CPU_TIMER = 0,
REAL_TIMER = 1
};
enum { CPU_TIMER = 0, REAL_TIMER = 1 };
/** Elapsed time timer keeping the best try.
*
* On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
* On Windows we use QueryPerformanceCounter
*
* Important: on linux, you must link with -lrt
*/
class BenchTimer
{
public:
BenchTimer()
{
*
* On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
* On Windows we use QueryPerformanceCounter
*
* Important: on linux, you must link with -lrt
*/
class BenchTimer {
public:
BenchTimer() {
#if defined(_WIN32) || defined(__CYGWIN__)
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
@ -72,69 +65,53 @@ public:
~BenchTimer() {}
inline void reset()
{
inline void reset() {
m_bests.fill(1e9);
m_worsts.fill(0);
m_totals.setZero();
}
inline void start()
{
m_starts[CPU_TIMER] = getCpuTime();
inline void start() {
m_starts[CPU_TIMER] = getCpuTime();
m_starts[REAL_TIMER] = getRealTime();
}
inline void stop()
{
inline void stop() {
m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
#if EIGEN_VERSION_AT_LEAST(2,90,0)
#if EIGEN_VERSION_AT_LEAST(2, 90, 0)
m_bests = m_bests.cwiseMin(m_times);
m_worsts = m_worsts.cwiseMax(m_times);
#else
m_bests(0) = std::min(m_bests(0),m_times(0));
m_bests(1) = std::min(m_bests(1),m_times(1));
m_worsts(0) = std::max(m_worsts(0),m_times(0));
m_worsts(1) = std::max(m_worsts(1),m_times(1));
#endif
#else
m_bests(0) = std::min(m_bests(0), m_times(0));
m_bests(1) = std::min(m_bests(1), m_times(1));
m_worsts(0) = std::max(m_worsts(0), m_times(0));
m_worsts(1) = std::max(m_worsts(1), m_times(1));
#endif
m_totals += m_times;
}
/** Return the elapsed time in seconds between the last start/stop pair
*/
inline double value(int TIMER = CPU_TIMER) const
{
return m_times[TIMER];
}
*/
inline double value(int TIMER = CPU_TIMER) const { return m_times[TIMER]; }
/** Return the best elapsed time in seconds
*/
inline double best(int TIMER = CPU_TIMER) const
{
return m_bests[TIMER];
}
*/
inline double best(int TIMER = CPU_TIMER) const { return m_bests[TIMER]; }
/** Return the worst elapsed time in seconds
*/
inline double worst(int TIMER = CPU_TIMER) const
{
return m_worsts[TIMER];
}
*/
inline double worst(int TIMER = CPU_TIMER) const { return m_worsts[TIMER]; }
/** Return the total elapsed time in seconds.
*/
inline double total(int TIMER = CPU_TIMER) const
{
return m_totals[TIMER];
}
*/
inline double total(int TIMER = CPU_TIMER) const { return m_totals[TIMER]; }
inline double getCpuTime() const
{
inline double getCpuTime() const {
#ifdef _WIN32
LARGE_INTEGER query_ticks;
QueryPerformanceCounter(&query_ticks);
return query_ticks.QuadPart/m_frequency;
return query_ticks.QuadPart / m_frequency;
#elif __APPLE__
return double(mach_absolute_time())*1e-9;
return double(mach_absolute_time()) * 1e-9;
#else
timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
@ -142,14 +119,13 @@ public:
#endif
}
inline double getRealTime() const
{
inline double getRealTime() const {
#ifdef _WIN32
SYSTEMTIME st;
GetSystemTime(&st);
return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
#elif __APPLE__
return double(mach_absolute_time())*1e-9;
return double(mach_absolute_time()) * 1e-9;
#else
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
@ -157,7 +133,7 @@ public:
#endif
}
protected:
protected:
#if defined(_WIN32) || defined(__CYGWIN__)
double m_frequency;
#endif
@ -167,33 +143,34 @@ protected:
Vector2d m_worsts;
Vector2d m_totals;
public:
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
};
#define BENCH(TIMER,TRIES,REP,CODE) { \
TIMER.reset(); \
for(int uglyvarname1=0; uglyvarname1<TRIES; ++uglyvarname1){ \
TIMER.start(); \
for(int uglyvarname2=0; uglyvarname2<REP; ++uglyvarname2){ \
CODE; \
} \
TIMER.stop(); \
clobber(); \
} \
#define BENCH(TIMER, TRIES, REP, CODE) \
{ \
TIMER.reset(); \
for (int uglyvarname1 = 0; uglyvarname1 < TRIES; ++uglyvarname1) { \
TIMER.start(); \
for (int uglyvarname2 = 0; uglyvarname2 < REP; ++uglyvarname2) { \
CODE; \
} \
TIMER.stop(); \
clobber(); \
} \
}
}
} // namespace Eigen
// clean #defined tokens
#ifdef EIGEN_BT_UNDEF_NOMINMAX
# undef EIGEN_BT_UNDEF_NOMINMAX
# undef NOMINMAX
#undef EIGEN_BT_UNDEF_NOMINMAX
#undef NOMINMAX
#endif
#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
# undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
# undef WIN32_LEAN_AND_MEAN
#undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
#undef WIN32_LEAN_AND_MEAN
#endif
#endif // EIGEN_BENCH_TIMERR_H
#endif // EIGEN_BENCH_TIMERR_H

View File

@ -18,54 +18,52 @@ using namespace Eigen;
#include <boost/preprocessor/punctuation/comma.hpp>
#include <boost/preprocessor/stringize.hpp>
template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline));
template<typename MatrixType> void initMatrix_random(MatrixType& mat)
{
mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols());
template <typename MatrixType>
void initMatrix_random(MatrixType& mat) __attribute__((noinline));
template <typename MatrixType>
void initMatrix_random(MatrixType& mat) {
mat.setRandom(); // = MatrixType::random(mat.rows(), mat.cols());
}
template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
{
template <typename MatrixType>
void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
template <typename MatrixType>
void initMatrix_identity(MatrixType& mat) {
mat.setIdentity();
}
#ifndef __INTEL_COMPILER
#define DISABLE_SSE_EXCEPTIONS() { \
int aux; \
asm( \
"stmxcsr %[aux] \n\t" \
"orl $32832, %[aux] \n\t" \
"ldmxcsr %[aux] \n\t" \
: : [aux] "m" (aux)); \
}
#define DISABLE_SSE_EXCEPTIONS() \
{ \
int aux; \
asm("stmxcsr %[aux] \n\t" \
"orl $32832, %[aux] \n\t" \
"ldmxcsr %[aux] \n\t" \
: \
: [aux] "m"(aux)); \
}
#else
#define DISABLE_SSE_EXCEPTIONS()
#define DISABLE_SSE_EXCEPTIONS()
#endif
#ifdef BENCH_GMM
#include <gmm/gmm.h>
template <typename EigenMatrixType, typename GmmMatrixType>
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
{
dst.resize(src.rows(),src.cols());
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
dst(i,j) = src.coeff(i,j);
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst) {
dst.resize(src.rows(), src.cols());
for (int j = 0; j < src.cols(); ++j)
for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
}
#endif
#ifdef BENCH_GSL
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_eigen.h>
template <typename EigenMatrixType>
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
{
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
gsl_matrix_set(*dst, i, j, src.coeff(i,j));
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst) {
for (int j = 0; j < src.cols(); ++j)
for (int i = 0; i < src.rows(); ++i) gsl_matrix_set(*dst, i, j, src.coeff(i, j));
}
#endif
@ -73,20 +71,16 @@ void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/vector.hpp>
template <typename EigenMatrixType, typename UblasMatrixType>
void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst)
{
dst.resize(src.rows(),src.cols());
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
dst(i,j) = src.coeff(i,j);
void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst) {
dst.resize(src.rows(), src.cols());
for (int j = 0; j < src.cols(); ++j)
for (int i = 0; i < src.rows(); ++i) dst(i, j) = src.coeff(i, j);
}
template <typename EigenType, typename UblasType>
void eiToUblasVec(const EigenType& src, UblasType& dst)
{
void eiToUblasVec(const EigenType& src, UblasType& dst) {
dst.resize(src.size());
for (int j=0; j<src.size(); ++j)
dst[j] = src.coeff(j);
for (int j = 0; j < src.size(); ++j) dst[j] = src.coeff(j);
}
#endif
#endif // EIGEN_BENCH_UTIL_H
#endif // EIGEN_BENCH_UTIL_H

View File

@ -37,20 +37,17 @@ uint8_t log2_pot(size_t x) {
return l;
}
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
{
uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
}
// just a helper to store a triple of K,M,N sizes for matrix product
struct size_triple_t
{
struct size_triple_t {
uint16_t k, m, n;
size_triple_t() : k(0), m(0), n(0) {}
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
size_triple_t(uint16_t compact)
{
size_triple_t(uint16_t compact) {
k = 1 << ((compact & 0xf00) >> 8);
m = 1 << ((compact & 0x0f0) >> 4);
n = 1 << ((compact & 0x00f) >> 0);
@ -58,35 +55,23 @@ struct size_triple_t
bool is_cubic() const { return k == m && m == n; }
};
ostream& operator<<(ostream& s, const size_triple_t& t)
{
return s << "(" << t.k << ", " << t.m << ", " << t.n << ")";
}
ostream& operator<<(ostream& s, const size_triple_t& t) { return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; }
struct inputfile_entry_t
{
struct inputfile_entry_t {
uint16_t product_size;
uint16_t pot_block_size;
size_triple_t nonpot_block_size;
float gflops;
};
struct inputfile_t
{
enum class type_t {
unknown,
all_pot_sizes,
default_sizes
};
struct inputfile_t {
enum class type_t { unknown, all_pot_sizes, default_sizes };
string filename;
vector<inputfile_entry_t> entries;
type_t type;
inputfile_t(const string& fname)
: filename(fname)
, type(type_t::unknown)
{
inputfile_t(const string& fname) : filename(fname), type(type_t::unknown) {
ifstream stream(filename);
if (!stream.is_open()) {
cerr << "couldn't open input file: " << filename << endl;
@ -111,27 +96,17 @@ struct inputfile_t
type = type_t::default_sizes;
continue;
}
if (type == type_t::unknown) {
continue;
}
switch(type) {
switch (type) {
case type_t::all_pot_sizes: {
unsigned int product_size, block_size;
float gflops;
int sscanf_result =
sscanf(line.c_str(), "%x %x %f",
&product_size,
&block_size,
&gflops);
if (3 != sscanf_result ||
!product_size ||
product_size > 0xfff ||
!block_size ||
block_size > 0xfff ||
!isfinite(gflops))
{
int sscanf_result = sscanf(line.c_str(), "%x %x %f", &product_size, &block_size, &gflops);
if (3 != sscanf_result || !product_size || product_size > 0xfff || !block_size || block_size > 0xfff ||
!isfinite(gflops)) {
cerr << "ill-formed input file: " << filename << endl;
cerr << "offending line:" << endl << line << endl;
exit(1);
@ -150,16 +125,8 @@ struct inputfile_t
unsigned int product_size;
float gflops;
int bk, bm, bn;
int sscanf_result =
sscanf(line.c_str(), "%x default(%d, %d, %d) %f",
&product_size,
&bk, &bm, &bn,
&gflops);
if (5 != sscanf_result ||
!product_size ||
product_size > 0xfff ||
!isfinite(gflops))
{
int sscanf_result = sscanf(line.c_str(), "%x default(%d, %d, %d) %f", &product_size, &bk, &bm, &bn, &gflops);
if (5 != sscanf_result || !product_size || product_size > 0xfff || !isfinite(gflops)) {
cerr << "ill-formed input file: " << filename << endl;
cerr << "offending line:" << endl << line << endl;
exit(1);
@ -175,7 +142,7 @@ struct inputfile_t
entries.push_back(entry);
break;
}
default:
break;
}
@ -192,27 +159,22 @@ struct inputfile_t
}
};
struct preprocessed_inputfile_entry_t
{
struct preprocessed_inputfile_entry_t {
uint16_t product_size;
uint16_t block_size;
float efficiency;
};
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2)
{
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) {
return e1.efficiency < e2.efficiency;
}
struct preprocessed_inputfile_t
{
struct preprocessed_inputfile_t {
string filename;
vector<preprocessed_inputfile_entry_t> entries;
preprocessed_inputfile_t(const inputfile_t& inputfile)
: filename(inputfile.filename)
{
preprocessed_inputfile_t(const inputfile_t& inputfile) : filename(inputfile.filename) {
if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
abort();
}
@ -220,20 +182,16 @@ struct preprocessed_inputfile_t
auto it_first_with_given_product_size = it;
while (it != inputfile.entries.end()) {
++it;
if (it == inputfile.entries.end() ||
it->product_size != it_first_with_given_product_size->product_size)
{
if (it == inputfile.entries.end() || it->product_size != it_first_with_given_product_size->product_size) {
import_input_file_range_one_product_size(it_first_with_given_product_size, it);
it_first_with_given_product_size = it;
}
}
}
private:
void import_input_file_range_one_product_size(
const vector<inputfile_entry_t>::const_iterator& begin,
const vector<inputfile_entry_t>::const_iterator& end)
{
private:
void import_input_file_range_one_product_size(const vector<inputfile_entry_t>::const_iterator& begin,
const vector<inputfile_entry_t>::const_iterator& end) {
uint16_t product_size = begin->product_size;
float max_gflops = 0.0f;
for (auto it = begin; it != end; ++it) {
@ -254,9 +212,7 @@ private:
}
};
void check_all_files_in_same_exact_order(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles)
{
void check_all_files_in_same_exact_order(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) {
if (preprocessed_inputfiles.empty()) {
return;
}
@ -266,11 +222,8 @@ void check_all_files_in_same_exact_order(
for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
if (preprocessed_inputfiles[i].entries.size() != num_entries) {
cerr << "these files have different number of entries: "
<< preprocessed_inputfiles[i].filename
<< " and "
<< first_file.filename
<< endl;
cerr << "these files have different number of entries: " << preprocessed_inputfiles[i].filename << " and "
<< first_file.filename << endl;
exit(1);
}
}
@ -281,12 +234,8 @@ void check_all_files_in_same_exact_order(
for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) {
const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index];
if (cur_file.entries[entry_index].product_size != entry_product_size ||
cur_file.entries[entry_index].block_size != entry_block_size)
{
cerr << "entries not in same order between these files: "
<< first_file.filename
<< " and "
<< cur_file.filename
cur_file.entries[entry_index].block_size != entry_block_size) {
cerr << "entries not in same order between these files: " << first_file.filename << " and " << cur_file.filename
<< endl;
exit(1);
}
@ -294,10 +243,8 @@ void check_all_files_in_same_exact_order(
}
}
float efficiency_of_subset(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset)
{
float efficiency_of_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset) {
if (subset.size() <= 1) {
return 1.0f;
}
@ -309,9 +256,7 @@ float efficiency_of_subset(
uint16_t product_size = first_file.entries[0].product_size;
while (entry_index < num_entries) {
++entry_index;
if (entry_index == num_entries ||
first_file.entries[entry_index].product_size != product_size)
{
if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
float efficiency_this_product_size = 0.0f;
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
float efficiency_this_entry = 1.0f;
@ -331,10 +276,8 @@ float efficiency_of_subset(
return efficiency;
}
void dump_table_for_subset(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset)
{
void dump_table_for_subset(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset) {
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
const size_t num_entries = first_file.entries.size();
size_t entry_index = 0;
@ -359,9 +302,7 @@ void dump_table_for_subset(
cout << " static const unsigned short data[" << TableSize << "] = {";
while (entry_index < num_entries) {
++entry_index;
if (entry_index == num_entries ||
first_file.entries[entry_index].product_size != product_size)
{
if (entry_index == num_entries || first_file.entries[entry_index].product_size != product_size) {
float best_efficiency_this_product_size = 0.0f;
uint16_t best_block_size_this_product_size = 0;
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
@ -397,10 +338,8 @@ void dump_table_for_subset(
cout << "};" << endl;
}
float efficiency_of_partition(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition)
{
float efficiency_of_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition) {
float efficiency = 1.0f;
for (auto s = partition.begin(); s != partition.end(); ++s) {
efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s));
@ -408,8 +347,7 @@ float efficiency_of_partition(
return efficiency;
}
void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size)
{
void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size) {
assert(subset_size >= 1 && subset_size <= set_size);
out_subset.resize(subset_size);
for (size_t i = 0; i < subset_size; i++) {
@ -417,13 +355,9 @@ void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t se
}
}
bool is_last_subset(const vector<size_t>& subset, size_t set_size)
{
return subset[0] == set_size - subset.size();
}
bool is_last_subset(const vector<size_t>& subset, size_t set_size) { return subset[0] == set_size - subset.size(); }
void next_subset(vector<size_t>& inout_subset, size_t set_size)
{
void next_subset(vector<size_t>& inout_subset, size_t set_size) {
if (is_last_subset(inout_subset, set_size)) {
cerr << "iterating past the last subset" << endl;
abort();
@ -444,9 +378,8 @@ void next_subset(vector<size_t>& inout_subset, size_t set_size)
const size_t number_of_subsets_limit = 100;
const size_t always_search_subsets_of_size_at_least = 2;
bool is_number_of_subsets_feasible(size_t n, size_t p)
{
assert(n>0 && p>0 && p<=n);
bool is_number_of_subsets_feasible(size_t n, size_t p) {
assert(n > 0 && p > 0 && p <= n);
uint64_t numerator = 1, denominator = 1;
for (size_t i = 0; i < p; i++) {
numerator *= n - i;
@ -458,24 +391,20 @@ bool is_number_of_subsets_feasible(size_t n, size_t p)
return true;
}
size_t max_feasible_subset_size(size_t n)
{
size_t max_feasible_subset_size(size_t n) {
assert(n > 0);
const size_t minresult = min<size_t>(n-1, always_search_subsets_of_size_at_least);
const size_t minresult = min<size_t>(n - 1, always_search_subsets_of_size_at_least);
for (size_t p = 1; p <= n - 1; p++) {
if (!is_number_of_subsets_feasible(n, p+1)) {
if (!is_number_of_subsets_feasible(n, p + 1)) {
return max(p, minresult);
}
}
return n - 1;
}
void find_subset_with_efficiency_higher_than(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
float required_efficiency_to_beat,
vector<size_t>& inout_remainder,
vector<size_t>& out_subset)
{
void find_subset_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
float required_efficiency_to_beat, vector<size_t>& inout_remainder,
vector<size_t>& out_subset) {
out_subset.resize(0);
if (required_efficiency_to_beat >= 1.0f) {
@ -484,7 +413,6 @@ void find_subset_with_efficiency_higher_than(
}
while (!inout_remainder.empty()) {
vector<size_t> candidate_indices(inout_remainder.size());
for (size_t i = 0; i < candidate_indices.size(); i++) {
candidate_indices[i] = i;
@ -493,20 +421,17 @@ void find_subset_with_efficiency_higher_than(
size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size());
while (candidate_indices_subset_size >= 1) {
vector<size_t> candidate_indices_subset;
make_first_subset(candidate_indices_subset_size,
candidate_indices_subset,
candidate_indices.size());
make_first_subset(candidate_indices_subset_size, candidate_indices_subset, candidate_indices.size());
vector<size_t> best_candidate_indices_subset;
float best_efficiency = 0.0f;
vector<size_t> trial_subset = out_subset;
trial_subset.resize(out_subset.size() + candidate_indices_subset_size);
while (true)
{
while (true) {
for (size_t i = 0; i < candidate_indices_subset_size; i++) {
trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]];
}
float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
if (trial_efficiency > best_efficiency) {
best_efficiency = trial_efficiency;
@ -517,7 +442,7 @@ void find_subset_with_efficiency_higher_than(
}
next_subset(candidate_indices_subset, candidate_indices.size());
}
if (best_efficiency > required_efficiency_to_beat) {
for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) {
candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]];
@ -526,7 +451,7 @@ void find_subset_with_efficiency_higher_than(
}
candidate_indices_subset_size--;
}
size_t candidate_index = candidate_indices[0];
auto candidate_iterator = inout_remainder.begin() + candidate_index;
vector<size_t> trial_subset = out_subset;
@ -542,11 +467,9 @@ void find_subset_with_efficiency_higher_than(
}
}
void find_partition_with_efficiency_higher_than(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
float required_efficiency_to_beat,
vector<vector<size_t>>& out_partition)
{
void find_partition_with_efficiency_higher_than(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
float required_efficiency_to_beat,
vector<vector<size_t>>& out_partition) {
out_partition.resize(0);
vector<size_t> remainder;
@ -556,25 +479,19 @@ void find_partition_with_efficiency_higher_than(
while (!remainder.empty()) {
vector<size_t> new_subset;
find_subset_with_efficiency_higher_than(
preprocessed_inputfiles,
required_efficiency_to_beat,
remainder,
new_subset);
find_subset_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, remainder,
new_subset);
out_partition.push_back(new_subset);
}
}
void print_partition(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition)
{
void print_partition(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition) {
float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl;
cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl;
for (auto subset = partition.begin(); subset != partition.end(); ++subset) {
cout << " Subset " << (subset - partition.begin())
<< ", efficiency " << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:"
<< endl;
cout << " Subset " << (subset - partition.begin()) << ", efficiency "
<< efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" << endl;
for (auto file = subset->begin(); file != subset->end(); ++file) {
cout << " " << preprocessed_inputfiles[*file].filename << endl;
}
@ -586,18 +503,18 @@ void print_partition(
cout << endl;
}
struct action_t
{
virtual const char* invokation_name() const { abort(); return nullptr; }
struct action_t {
virtual const char* invokation_name() const {
abort();
return nullptr;
}
virtual void run(const vector<string>&) const { abort(); }
virtual ~action_t() {}
};
struct partition_action_t : action_t
{
struct partition_action_t : action_t {
virtual const char* invokation_name() const override { return "partition"; }
virtual void run(const vector<string>& input_filenames) const override
{
virtual void run(const vector<string>& input_filenames) const override {
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
if (input_filenames.empty()) {
@ -627,17 +544,12 @@ struct partition_action_t : action_t
float required_efficiency_to_beat = 0.0f;
vector<vector<vector<size_t>>> partitions;
cerr << "searching for partitions...\r" << flush;
while (true)
{
while (true) {
vector<vector<size_t>> partition;
find_partition_with_efficiency_higher_than(
preprocessed_inputfiles,
required_efficiency_to_beat,
partition);
find_partition_with_efficiency_higher_than(preprocessed_inputfiles, required_efficiency_to_beat, partition);
float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
<< " subsets for " << 100.0f * actual_efficiency
<< " % efficiency"
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() << " subsets for "
<< 100.0f * actual_efficiency << " % efficiency"
<< " \r" << flush;
partitions.push_back(partition);
if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
@ -649,7 +561,7 @@ struct partition_action_t : action_t
while (true) {
bool repeat = false;
for (size_t i = 0; i < partitions.size() - 1; i++) {
if (partitions[i].size() >= partitions[i+1].size()) {
if (partitions[i].size() >= partitions[i + 1].size()) {
partitions.erase(partitions.begin() + i);
repeat = true;
break;
@ -665,8 +577,7 @@ struct partition_action_t : action_t
}
};
struct evaluate_defaults_action_t : action_t
{
struct evaluate_defaults_action_t : action_t {
struct results_entry_t {
uint16_t product_size;
size_triple_t default_block_size;
@ -675,30 +586,24 @@ struct evaluate_defaults_action_t : action_t
float best_pot_gflops;
float default_efficiency;
};
friend ostream& operator<<(ostream& s, const results_entry_t& entry)
{
return s
<< "Product size " << size_triple_t(entry.product_size)
<< ": default block size " << entry.default_block_size
<< " -> " << entry.default_gflops
<< " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
<< " of best POT block size " << size_triple_t(entry.best_pot_block_size)
<< " -> " << entry.best_pot_gflops
<< " GFlop/s" << dec;
friend ostream& operator<<(ostream& s, const results_entry_t& entry) {
return s << "Product size " << size_triple_t(entry.product_size) << ": default block size "
<< entry.default_block_size << " -> " << entry.default_gflops
<< " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
<< " of best POT block size " << size_triple_t(entry.best_pot_block_size) << " -> "
<< entry.best_pot_gflops << " GFlop/s" << dec;
}
static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
return e1.default_efficiency < e2.default_efficiency;
}
virtual const char* invokation_name() const override { return "evaluate-defaults"; }
void show_usage_and_exit() const
{
void show_usage_and_exit() const {
cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
cerr << "checks how well the performance with default sizes compares to the best "
<< "performance measured over all POT sizes." << endl;
exit(1);
}
virtual void run(const vector<string>& input_filenames) const override
{
virtual void run(const vector<string>& input_filenames) const override {
if (input_filenames.size() != 2) {
show_usage_and_exit();
}
@ -714,20 +619,17 @@ struct evaluate_defaults_action_t : action_t
}
vector<results_entry_t> results;
vector<results_entry_t> cubic_results;
uint16_t product_size = 0;
auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
it_default_sizes != inputfile_default_sizes.entries.end();
++it_default_sizes)
{
it_default_sizes != inputfile_default_sizes.entries.end(); ++it_default_sizes) {
if (it_default_sizes->product_size == product_size) {
continue;
}
product_size = it_default_sizes->product_size;
while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
it_all_pot_sizes->product_size != product_size)
{
it_all_pot_sizes->product_size != product_size) {
++it_all_pot_sizes;
}
if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
@ -735,10 +637,8 @@ struct evaluate_defaults_action_t : action_t
}
uint16_t best_pot_block_size = 0;
float best_pot_gflops = 0;
for (auto it = it_all_pot_sizes;
it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
++it)
{
for (auto it = it_all_pot_sizes; it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
++it) {
if (it->gflops > best_pot_gflops) {
best_pot_gflops = it->gflops;
best_pot_block_size = it->pot_block_size;
@ -766,7 +666,7 @@ struct evaluate_defaults_action_t : action_t
cout << endl;
sort(results.begin(), results.end(), lower_efficiency);
const size_t n = min<size_t>(20, results.size());
cout << n << " worst results:" << endl;
for (size_t i = 0; i < n; i++) {
@ -781,34 +681,30 @@ struct evaluate_defaults_action_t : action_t
cout << endl;
sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
cout.precision(2);
vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f};
for (auto it = a.begin(); it != a.end(); ++it) {
size_t n = min(results.size() - 1, size_t(*it * results.size()));
cout << (100.0f * n / (results.size() - 1))
<< " % of product sizes have default efficiency <= "
<< 100.0f * results[n].default_efficiency << " %" << endl;
<< " % of product sizes have default efficiency <= " << 100.0f * results[n].default_efficiency << " %"
<< endl;
}
cout.precision(default_precision);
}
};
void show_usage_and_exit(int argc, char* argv[],
const vector<unique_ptr<action_t>>& available_actions)
{
void show_usage_and_exit(int argc, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
cerr << "available actions:" << endl;
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
cerr << " " << (*it)->invokation_name() << endl;
}
}
cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
exit(1);
}
int main(int argc, char* argv[])
{
int main(int argc, char* argv[]) {
cout.precision(default_precision);
cerr.precision(default_precision);

View File

@ -3,32 +3,31 @@
#include "BenchUtil.h"
#include "basicbenchmark.h"
int main(int argc, char *argv[])
{
int main(int argc, char *argv[]) {
DISABLE_SSE_EXCEPTIONS();
// this is the list of matrix type and size we want to bench:
// ((suffix) (matrix size) (number of iterations))
#define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000))
// #define MODES ((Xd)(20)(10000))
// this is the list of matrix type and size we want to bench:
// ((suffix) (matrix size) (number of iterations))
#define MODES ((3d)(3)(4000000))((4d)(4)(1000000))((Xd)(4)(1000000))((Xd)(20)(10000))
// #define MODES ((Xd)(20)(10000))
#define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
#define _GENERATE_HEADER(R, ARG, EL) \
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) \
<< "-" \
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl;
std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES) << endl;
const int tries = 10;
#define _RUN_BENCH(R,ARG,EL) \
std::cout << ARG( \
BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\
BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \
<< " ";
#define _RUN_BENCH(R, ARG, EL) \
std::cout << ARG(BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL))(BOOST_PP_SEQ_ELEM(1, EL), BOOST_PP_SEQ_ELEM(1, EL)), \
BOOST_PP_SEQ_ELEM(2, EL), tries) \
<< " ";
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES );
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES);
std::cout << endl;
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES );
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES);
std::cout << endl;
return 0;

View File

@ -2,55 +2,46 @@
#ifndef EIGEN_BENCH_BASICBENCH_H
#define EIGEN_BENCH_BASICBENCH_H
enum {LazyEval, EarlyEval, OmpEval};
enum { LazyEval, EarlyEval, OmpEval };
template<int Mode, typename MatrixType>
template <int Mode, typename MatrixType>
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
template<int Mode, typename MatrixType>
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations)
{
for(int a = 0; a < iterations; a++)
{
if (Mode==LazyEval)
{
template <int Mode, typename MatrixType>
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) {
for (int a = 0; a < iterations; a++) {
if (Mode == LazyEval) {
asm("#begin_bench_loop LazyEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
}
else if (Mode==OmpEval)
{
} else if (Mode == OmpEval) {
asm("#begin_bench_loop OmpEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
m = (I + 0.00005 * (m + m.lazyProduct(m))).eval();
}
else
{
} else {
asm("#begin_bench_loop EarlyEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
if (MatrixType::SizeAtCompileTime != Eigen::Dynamic) asm("#fixedsize");
m = I + 0.00005 * (m + m * m);
}
asm("#end_bench_loop");
}
}
template<int Mode, typename MatrixType>
template <int Mode, typename MatrixType>
double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
template<int Mode, typename MatrixType>
double benchBasic(const MatrixType& mat, int iterations, int tries)
{
template <int Mode, typename MatrixType>
double benchBasic(const MatrixType& mat, int iterations, int tries) {
const int rows = mat.rows();
const int cols = mat.cols();
MatrixType I(rows,cols);
MatrixType m(rows,cols);
MatrixType I(rows, cols);
MatrixType m(rows, cols);
initMatrix_identity(I);
Eigen::BenchTimer timer;
for(uint t=0; t<tries; ++t)
{
for (uint t = 0; t < tries; ++t) {
initMatrix_random(m);
timer.start();
benchBasic_loop<Mode>(I, m, iterations);
@ -60,4 +51,4 @@ double benchBasic(const MatrixType& mat, int iterations, int tries)
return timer.value();
};
#endif // EIGEN_BENCH_BASICBENCH_H
#endif // EIGEN_BENCH_BASICBENCH_H

View File

@ -25,59 +25,47 @@ typedef double Scalar;
#define CBLAS_GEMM cblas_dgemm
#endif
typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix;
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
void check_product(int M, int N, int K);
void check_product(void);
int main(int argc, char *argv[])
{
// disable SSE exceptions
#ifdef __GNUC__
int main(int argc, char* argv[]) {
// disable SSE exceptions
#ifdef __GNUC__
{
int aux;
asm(
"stmxcsr %[aux] \n\t"
"orl $32832, %[aux] \n\t"
"ldmxcsr %[aux] \n\t"
: : [aux] "m" (aux));
asm("stmxcsr %[aux] \n\t"
"orl $32832, %[aux] \n\t"
"ldmxcsr %[aux] \n\t"
:
: [aux] "m"(aux));
}
#endif
#endif
int nbtries=1, nbloops=1, M, N, K;
int nbtries = 1, nbloops = 1, M, N, K;
if (argc==2)
{
if (std::string(argv[1])=="check")
if (argc == 2) {
if (std::string(argv[1]) == "check")
check_product();
else
M = N = K = atoi(argv[1]);
}
else if ((argc==3) && (std::string(argv[1])=="auto"))
{
} else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
M = N = K = atoi(argv[2]);
nbloops = 1000000000/(M*M*M);
if (nbloops<1)
nbloops = 1;
nbloops = 1000000000 / (M * M * M);
if (nbloops < 1) nbloops = 1;
nbtries = 6;
}
else if (argc==4)
{
} else if (argc == 4) {
M = N = K = atoi(argv[1]);
nbloops = atoi(argv[2]);
nbtries = atoi(argv[3]);
}
else if (argc==6)
{
} else if (argc == 6) {
M = atoi(argv[1]);
N = atoi(argv[2]);
K = atoi(argv[3]);
nbloops = atoi(argv[4]);
nbtries = atoi(argv[5]);
}
else
{
} else {
std::cout << "Usage: " << argv[0] << " size \n";
std::cout << "Usage: " << argv[0] << " auto size\n";
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
@ -95,14 +83,13 @@ int main(int argc, char *argv[])
double nbmad = double(M) * double(N) * double(K) * double(nbloops);
if (!(std::string(argv[1])=="auto"))
std::cout << M << " x " << N << " x " << K << "\n";
if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n";
Scalar alpha, beta;
MyMatrix ma(M,K), mb(K,N), mc(M,N);
ma = MyMatrix::Random(M,K);
mb = MyMatrix::Random(K,N);
mc = MyMatrix::Random(M,N);
MyMatrix ma(M, K), mb(K, N), mc(M, N);
ma = MyMatrix::Random(M, K);
mb = MyMatrix::Random(K, N);
mc = MyMatrix::Random(M, N);
Eigen::BenchTimer timer;
@ -112,108 +99,101 @@ int main(int argc, char *argv[])
// bench cblas
// ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
if (!(std::string(argv[1])=="auto"))
{
if (!(std::string(argv[1]) == "auto")) {
timer.reset();
for (uint k=0 ; k<nbtries ; ++k)
{
timer.start();
for (uint j=0 ; j<nbloops ; ++j)
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
#else
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
#endif
timer.stop();
for (uint k = 0; k < nbtries; ++k) {
timer.start();
for (uint j = 0; j < nbloops; ++j)
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta,
mc.data(), N);
#else
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta,
mc.data(), M);
#endif
timer.stop();
}
if (!(std::string(argv[1])=="auto"))
std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
if (!(std::string(argv[1]) == "auto"))
std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
else
std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
}
// clear
ma = MyMatrix::Random(M,K);
mb = MyMatrix::Random(K,N);
mc = MyMatrix::Random(M,N);
ma = MyMatrix::Random(M, K);
mb = MyMatrix::Random(K, N);
mc = MyMatrix::Random(M, N);
// eigen
// if (!(std::string(argv[1])=="auto"))
// if (!(std::string(argv[1])=="auto"))
{
timer.reset();
for (uint k=0 ; k<nbtries ; ++k)
{
timer.start();
bench_eigengemm(mc, ma, mb, nbloops);
timer.stop();
}
if (!(std::string(argv[1])=="auto"))
std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
else
std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
timer.reset();
for (uint k = 0; k < nbtries; ++k) {
timer.start();
bench_eigengemm(mc, ma, mb, nbloops);
timer.stop();
}
if (!(std::string(argv[1]) == "auto"))
std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
else
std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
}
std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
return 0;
}
using namespace Eigen;
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
{
for (uint j=0 ; j<nbloops ; ++j)
mc.noalias() += ma * mb;
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) {
for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
}
#define MYVERIFY(A,M) if (!(A)) { \
#define MYVERIFY(A, M) \
if (!(A)) { \
std::cout << "FAIL: " << M << "\n"; \
}
void check_product(int M, int N, int K)
{
MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
ma = MyMatrix::Random(M,K);
mb = MyMatrix::Random(K,N);
void check_product(int M, int N, int K) {
MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N);
ma = MyMatrix::Random(M, K);
mb = MyMatrix::Random(K, N);
maT = ma.transpose();
mbT = mb.transpose();
mc = MyMatrix::Random(M,N);
mc = MyMatrix::Random(M, N);
MyMatrix::Scalar eps = 1e-4;
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
meigen += ma * mb;
MYVERIFY(meigen.isApprox(mref, eps),". * .");
MYVERIFY(meigen.isApprox(mref, eps), ". * .");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
meigen += maT.transpose() * mb;
MYVERIFY(meigen.isApprox(mref, eps),"T * .");
MYVERIFY(meigen.isApprox(mref, eps), "T * .");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
meigen += (maT.transpose()) * (mbT.transpose());
MYVERIFY(meigen.isApprox(mref, eps),"T * T");
MYVERIFY(meigen.isApprox(mref, eps), "T * T");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
meigen += ma * mbT.transpose();
MYVERIFY(meigen.isApprox(mref, eps),". * T");
MYVERIFY(meigen.isApprox(mref, eps), ". * T");
}
void check_product(void)
{
void check_product(void) {
int M, N, K;
for (uint i=0; i<1000; ++i)
{
M = internal::random<int>(1,64);
N = internal::random<int>(1,768);
K = internal::random<int>(1,768);
for (uint i = 0; i < 1000; ++i) {
M = internal::random<int>(1, 64);
N = internal::random<int>(1, 768);
K = internal::random<int>(1, 768);
M = (0 + M) * 1;
std::cout << M << " x " << N << " x " << K << "\n";
check_product(M, N, K);
}
}

View File

@ -25,117 +25,100 @@ using namespace Eigen;
typedef float Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void benchLLT(const MatrixType& m)
{
__attribute__((noinline)) void benchLLT(const MatrixType& m) {
int rows = m.rows();
int cols = m.cols();
double cost = 0;
for (int j=0; j<rows; ++j)
{
int r = std::max(rows - j -1,0);
cost += 2*(r*j+r+j);
for (int j = 0; j < rows; ++j) {
int r = std::max(rows - j - 1, 0);
cost += 2 * (r * j + r + j);
}
int repeats = (REPEAT*1000)/(rows*rows);
int repeats = (REPEAT * 1000) / (rows * rows);
typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
MatrixType a = MatrixType::Random(rows,cols);
SquareMatrixType covMat = a * a.adjoint();
MatrixType a = MatrixType::Random(rows, cols);
SquareMatrixType covMat = a * a.adjoint();
BenchTimer timerNoSqrt, timerSqrt;
Scalar acc = 0;
int r = internal::random<int>(0,covMat.rows()-1);
int c = internal::random<int>(0,covMat.cols()-1);
for (int t=0; t<TRIES; ++t)
{
int r = internal::random<int>(0, covMat.rows() - 1);
int c = internal::random<int>(0, covMat.cols() - 1);
for (int t = 0; t < TRIES; ++t) {
timerNoSqrt.start();
for (int k=0; k<repeats; ++k)
{
for (int k = 0; k < repeats; ++k) {
LDLT<SquareMatrixType> cholnosqrt(covMat);
acc += cholnosqrt.matrixL().coeff(r,c);
acc += cholnosqrt.matrixL().coeff(r, c);
}
timerNoSqrt.stop();
}
for (int t=0; t<TRIES; ++t)
{
for (int t = 0; t < TRIES; ++t) {
timerSqrt.start();
for (int k=0; k<repeats; ++k)
{
for (int k = 0; k < repeats; ++k) {
LLT<SquareMatrixType> chol(covMat);
acc += chol.matrixL().coeff(r,c);
acc += chol.matrixL().coeff(r, c);
}
timerSqrt.stop();
}
if (MatrixType::RowsAtCompileTime==Dynamic)
if (MatrixType::RowsAtCompileTime == Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << covMat.rows() << " \t"
<< (timerNoSqrt.best()) / repeats << "s "
<< "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t"
<< (timerSqrt.best()) / repeats << "s "
<< "(" << 1e-9 * cost*repeats/timerSqrt.best() << " GFLOPS)\n";
std::cout << covMat.rows() << " \t" << (timerNoSqrt.best()) / repeats << "s "
<< "(" << 1e-9 * cost * repeats / timerNoSqrt.best() << " GFLOPS)\t" << (timerSqrt.best()) / repeats << "s "
<< "(" << 1e-9 * cost * repeats / timerSqrt.best() << " GFLOPS)\n";
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime==Dynamic)
{
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime == Dynamic) {
timerSqrt.reset();
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
eiToGsl(covMat, &gslCovMat);
for (int t=0; t<TRIES; ++t)
{
for (int t = 0; t < TRIES; ++t) {
timerSqrt.start();
for (int k=0; k<repeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
for (int k = 0; k < repeats; ++k) {
gsl_matrix_memcpy(gslCopy, gslCovMat);
gsl_linalg_cholesky_decomp(gslCopy);
acc += gsl_matrix_get(gslCopy,r,c);
acc += gsl_matrix_get(gslCopy, r, c);
}
timerSqrt.stop();
}
std::cout << " | \t"
<< timerSqrt.value() * REPEAT / repeats << "s";
std::cout << " | \t" << timerSqrt.value() * REPEAT / repeats << "s";
gsl_matrix_free(gslCovMat);
}
#endif
#endif
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
if (acc == 123) std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,1500,0};
int main(int argc, char* argv[]) {
const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 1500, 0};
std::cout << "size LDLT LLT";
// #ifdef BENCH_GSL
// std::cout << " GSL (standard + double + ATLAS) ";
// #endif
// #ifdef BENCH_GSL
// std::cout << " GSL (standard + double + ATLAS) ";
// #endif
std::cout << "\n";
for (int i=0; dynsizes[i]>0; ++i)
benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
for (int i = 0; dynsizes[i] > 0; ++i) benchLLT(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
benchLLT(Matrix<Scalar,2,2>());
benchLLT(Matrix<Scalar,3,3>());
benchLLT(Matrix<Scalar,4,4>());
benchLLT(Matrix<Scalar,5,5>());
benchLLT(Matrix<Scalar,6,6>());
benchLLT(Matrix<Scalar,7,7>());
benchLLT(Matrix<Scalar,8,8>());
benchLLT(Matrix<Scalar,12,12>());
benchLLT(Matrix<Scalar,16,16>());
benchLLT(Matrix<Scalar, 2, 2>());
benchLLT(Matrix<Scalar, 3, 3>());
benchLLT(Matrix<Scalar, 4, 4>());
benchLLT(Matrix<Scalar, 5, 5>());
benchLLT(Matrix<Scalar, 6, 6>());
benchLLT(Matrix<Scalar, 7, 7>());
benchLLT(Matrix<Scalar, 8, 8>());
benchLLT(Matrix<Scalar, 12, 12>());
benchLLT(Matrix<Scalar, 16, 16>());
return 0;
}

View File

@ -31,34 +31,31 @@ using namespace Eigen;
typedef SCALAR Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
{
__attribute__((noinline)) void benchEigenSolver(const MatrixType& m) {
int rows = m.rows();
int cols = m.cols();
int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
int stdRepeats = std::max(1, int((REPEAT * 1000) / (rows * rows * sqrt(rows))));
int saRepeats = stdRepeats * 4;
typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
MatrixType a = MatrixType::Random(rows,cols);
SquareMatrixType covMat = a * a.adjoint();
MatrixType a = MatrixType::Random(rows, cols);
SquareMatrixType covMat = a * a.adjoint();
BenchTimer timerSa, timerStd;
Scalar acc = 0;
int r = internal::random<int>(0,covMat.rows()-1);
int c = internal::random<int>(0,covMat.cols()-1);
int r = internal::random<int>(0, covMat.rows() - 1);
int c = internal::random<int>(0, covMat.cols() - 1);
{
SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
for (int t=0; t<TRIES; ++t)
{
for (int t = 0; t < TRIES; ++t) {
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
for (int k = 0; k < saRepeats; ++k) {
ei.compute(covMat);
acc += ei.eigenvectors().coeff(r,c);
acc += ei.eigenvectors().coeff(r, c);
}
timerSa.stop();
}
@ -66,107 +63,94 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
{
EigenSolver<SquareMatrixType> ei(covMat);
for (int t=0; t<TRIES; ++t)
{
for (int t = 0; t < TRIES; ++t) {
timerStd.start();
for (int k=0; k<stdRepeats; ++k)
{
for (int k = 0; k < stdRepeats; ++k) {
ei.compute(covMat);
acc += ei.eigenvectors().coeff(r,c);
acc += ei.eigenvectors().coeff(r, c);
}
timerStd.stop();
}
}
if (MatrixType::RowsAtCompileTime==Dynamic)
if (MatrixType::RowsAtCompileTime == Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << covMat.rows() << " \t"
<< timerSa.value() * REPEAT / saRepeats << "s \t"
std::cout << covMat.rows() << " \t" << timerSa.value() * REPEAT / saRepeats << "s \t"
<< timerStd.value() * REPEAT / stdRepeats << "s";
#ifdef BENCH_GMM
if (MatrixType::RowsAtCompileTime==Dynamic)
{
#ifdef BENCH_GMM
if (MatrixType::RowsAtCompileTime == Dynamic) {
timerSa.reset();
timerStd.reset();
gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(), covMat.cols());
gmm::dense_matrix<Scalar> eigvect(covMat.rows(), covMat.cols());
std::vector<Scalar> eigval(covMat.rows());
eiToGmm(covMat, gmmCovMat);
for (int t=0; t<TRIES; ++t)
{
for (int t = 0; t < TRIES; ++t) {
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
for (int k = 0; k < saRepeats; ++k) {
gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
acc += eigvect(r,c);
acc += eigvect(r, c);
}
timerSa.stop();
}
// the non-selfadjoint solver does not compute the eigen vectors
// for (int t=0; t<TRIES; ++t)
// {
// timerStd.start();
// for (int k=0; k<stdRepeats; ++k)
// {
// gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
// acc += eigvect(r,c);
// }
// timerStd.stop();
// }
// for (int t=0; t<TRIES; ++t)
// {
// timerStd.start();
// for (int k=0; k<stdRepeats; ++k)
// {
// gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
// acc += eigvect(r,c);
// }
// timerStd.stop();
// }
std::cout << " | \t"
<< timerSa.value() * REPEAT / saRepeats << "s"
std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s"
<< /*timerStd.value() * REPEAT / stdRepeats << "s"*/ " na ";
}
#endif
#endif
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime==Dynamic)
{
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime == Dynamic) {
timerSa.reset();
timerStd.reset();
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(), covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(), covMat.cols());
gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(), covMat.cols());
gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(), covMat.cols());
gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
eiToGsl(covMat, &gslCovMat);
for (int t=0; t<TRIES; ++t)
{
for (int t = 0; t < TRIES; ++t) {
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
for (int k = 0; k < saRepeats; ++k) {
gsl_matrix_memcpy(gslCopy, gslCovMat);
gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
acc += gsl_matrix_get(eigvect,r,c);
acc += gsl_matrix_get(eigvect, r, c);
}
timerSa.stop();
}
for (int t=0; t<TRIES; ++t)
{
for (int t = 0; t < TRIES; ++t) {
timerStd.start();
for (int k=0; k<stdRepeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
for (int k = 0; k < stdRepeats; ++k) {
gsl_matrix_memcpy(gslCopy, gslCovMat);
gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
acc += GSL_REAL(gsl_matrix_complex_get(eigvectz, r, c));
}
timerStd.stop();
}
std::cout << " | \t"
<< timerSa.value() * REPEAT / saRepeats << "s \t"
<< timerStd.value() * REPEAT / stdRepeats << "s";
std::cout << " | \t" << timerSa.value() * REPEAT / saRepeats << "s \t" << timerStd.value() * REPEAT / stdRepeats
<< "s";
gsl_matrix_free(gslCovMat);
gsl_vector_free(gslCopy);
@ -177,36 +161,32 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
gsl_eigen_symmv_free(eisymm);
gsl_eigen_nonsymmv_free(einonsymm);
}
#endif
#endif
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
if (acc == 123) std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
int main(int argc, char* argv[]) {
const int dynsizes[] = {4, 6, 8, 12, 16, 24, 32, 64, 128, 256, 512, 0};
std::cout << "size selfadjoint generic";
#ifdef BENCH_GMM
#ifdef BENCH_GMM
std::cout << " GMM++ ";
#endif
#ifdef BENCH_GSL
#endif
#ifdef BENCH_GSL
std::cout << " GSL (double + ATLAS) ";
#endif
#endif
std::cout << "\n";
for (uint i=0; dynsizes[i]>0; ++i)
benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
for (uint i = 0; dynsizes[i] > 0; ++i) benchEigenSolver(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
benchEigenSolver(Matrix<Scalar,2,2>());
benchEigenSolver(Matrix<Scalar,3,3>());
benchEigenSolver(Matrix<Scalar,4,4>());
benchEigenSolver(Matrix<Scalar,6,6>());
benchEigenSolver(Matrix<Scalar,8,8>());
benchEigenSolver(Matrix<Scalar,12,12>());
benchEigenSolver(Matrix<Scalar,16,16>());
benchEigenSolver(Matrix<Scalar, 2, 2>());
benchEigenSolver(Matrix<Scalar, 3, 3>());
benchEigenSolver(Matrix<Scalar, 4, 4>());
benchEigenSolver(Matrix<Scalar, 6, 6>());
benchEigenSolver(Matrix<Scalar, 8, 8>());
benchEigenSolver(Matrix<Scalar, 12, 12>());
benchEigenSolver(Matrix<Scalar, 16, 16>());
return 0;
}

View File

@ -19,13 +19,21 @@
using namespace Eigen;
using namespace std;
template <typename T>
string nameof();
template <> string nameof<float>() {return "float";}
template <> string nameof<double>() {return "double";}
template <> string nameof<long double>() {return "long double";}
template <>
string nameof<float>() {
return "float";
}
template <>
string nameof<double>() {
return "double";
}
template <>
string nameof<long double>() {
return "long double";
}
#ifndef TYPE
#define TYPE float
@ -41,75 +49,69 @@ template <> string nameof<long double>() {return "long double";}
using namespace Eigen;
template <typename T>
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
{
typedef typename NumTraits<T>::Real Scalar;
typedef typename std::complex<Scalar> Complex;
int nits = NDATA/nfft;
vector<T> inbuf(nfft);
vector<Complex > outbuf(nfft);
FFT< Scalar > fft;
void bench(int nfft, bool fwd, bool unscaled = false, bool halfspec = false) {
typedef typename NumTraits<T>::Real Scalar;
typedef typename std::complex<Scalar> Complex;
int nits = NDATA / nfft;
vector<T> inbuf(nfft);
vector<Complex> outbuf(nfft);
FFT<Scalar> fft;
if (unscaled) {
fft.SetFlag(fft.Unscaled);
cout << "unscaled ";
}
if (halfspec) {
fft.SetFlag(fft.HalfSpectrum);
cout << "halfspec ";
}
std::fill(inbuf.begin(),inbuf.end(),0);
fft.fwd( outbuf , inbuf);
BenchTimer timer;
timer.reset();
for (int k=0;k<8;++k) {
timer.start();
if (fwd)
for(int i = 0; i < nits; i++)
fft.fwd( outbuf , inbuf);
else
for(int i = 0; i < nits; i++)
fft.inv(inbuf,outbuf);
timer.stop();
}
cout << nameof<Scalar>() << " ";
double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits );
if ( NumTraits<T>::IsComplex ) {
cout << "complex";
}else{
cout << "real ";
mflops /= 2;
}
if (unscaled) {
fft.SetFlag(fft.Unscaled);
cout << "unscaled ";
}
if (halfspec) {
fft.SetFlag(fft.HalfSpectrum);
cout << "halfspec ";
}
std::fill(inbuf.begin(), inbuf.end(), 0);
fft.fwd(outbuf, inbuf);
BenchTimer timer;
timer.reset();
for (int k = 0; k < 8; ++k) {
timer.start();
if (fwd)
cout << " fwd";
for (int i = 0; i < nits; i++) fft.fwd(outbuf, inbuf);
else
cout << " inv";
for (int i = 0; i < nits; i++) fft.inv(inbuf, outbuf);
timer.stop();
}
cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n";
cout << nameof<Scalar>() << " ";
double mflops = 5. * nfft * log2((double)nfft) / (1e6 * timer.value() / (double)nits);
if (NumTraits<T>::IsComplex) {
cout << "complex";
} else {
cout << "real ";
mflops /= 2;
}
if (fwd)
cout << " fwd";
else
cout << " inv";
cout << " NFFT=" << nfft << " " << (double(1e-6 * nfft * nits) / timer.value()) << " MS/s " << mflops << "MFLOPS\n";
}
int main(int argc,char ** argv)
{
bench<complex<float> >(NFFT,true);
bench<complex<float> >(NFFT,false);
bench<float>(NFFT,true);
bench<float>(NFFT,false);
bench<float>(NFFT,false,true);
bench<float>(NFFT,false,true,true);
int main(int argc, char** argv) {
bench<complex<float> >(NFFT, true);
bench<complex<float> >(NFFT, false);
bench<float>(NFFT, true);
bench<float>(NFFT, false);
bench<float>(NFFT, false, true);
bench<float>(NFFT, false, true, true);
bench<complex<double> >(NFFT,true);
bench<complex<double> >(NFFT,false);
bench<double>(NFFT,true);
bench<double>(NFFT,false);
bench<complex<long double> >(NFFT,true);
bench<complex<long double> >(NFFT,false);
bench<long double>(NFFT,true);
bench<long double>(NFFT,false);
return 0;
bench<complex<double> >(NFFT, true);
bench<complex<double> >(NFFT, false);
bench<double>(NFFT, true);
bench<double>(NFFT, false);
bench<complex<long double> >(NFFT, true);
bench<complex<long double> >(NFFT, false);
bench<long double>(NFFT, true);
bench<long double>(NFFT, false);
return 0;
}

View File

@ -11,124 +11,110 @@ using namespace std;
#define REPEAT 1000000
#endif
enum func_opt
{
TV,
TMATV,
TMATVMAT,
enum func_opt {
TV,
TMATV,
TMATVMAT,
};
template <class res, class arg1, class arg2, int opt>
struct func;
template <class res, class arg1, class arg2>
struct func<res, arg1, arg2, TV>
{
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
{
asm ("");
return a1 * a2;
}
struct func<res, arg1, arg2, TV> {
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
asm("");
return a1 * a2;
}
};
template <class res, class arg1, class arg2>
struct func<res, arg1, arg2, TMATV>
{
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
{
asm ("");
return a1.matrix() * a2;
}
struct func<res, arg1, arg2, TMATV> {
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
asm("");
return a1.matrix() * a2;
}
};
template <class res, class arg1, class arg2>
struct func<res, arg1, arg2, TMATVMAT>
{
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
{
asm ("");
return res(a1.matrix() * a2.matrix());
}
struct func<res, arg1, arg2, TMATVMAT> {
static EIGEN_DONT_INLINE res run(arg1& a1, arg2& a2) {
asm("");
return res(a1.matrix() * a2.matrix());
}
};
template <class func, class arg1, class arg2>
struct test_transform
{
static void run()
{
arg1 a1;
a1.setIdentity();
arg2 a2;
a2.setIdentity();
struct test_transform {
static void run() {
arg1 a1;
a1.setIdentity();
arg2 a2;
a2.setIdentity();
BenchTimer timer;
timer.reset();
for (int k=0; k<10; ++k)
{
timer.start();
for (int k=0; k<REPEAT; ++k)
a2 = func::run( a1, a2 );
timer.stop();
}
cout << setprecision(4) << fixed << timer.value() << "s " << endl;;
BenchTimer timer;
timer.reset();
for (int k = 0; k < 10; ++k) {
timer.start();
for (int k = 0; k < REPEAT; ++k) a2 = func::run(a1, a2);
timer.stop();
}
cout << setprecision(4) << fixed << timer.value() << "s " << endl;
;
}
};
#define run_vec(op, scalar, mode, option, vsize) \
std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
{ \
typedef Transform<scalar, 3, mode, option> Trans; \
typedef Matrix<scalar, vsize, 1, option> Vec; \
typedef func<Vec, Trans, Vec, op> Func; \
test_transform<Func, Trans, Vec>::run(); \
}
#define run_vec( op, scalar, mode, option, vsize ) \
std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
{\
typedef Transform<scalar, 3, mode, option> Trans;\
typedef Matrix<scalar, vsize, 1, option> Vec;\
typedef func<Vec,Trans,Vec,op> Func;\
test_transform< Func, Trans, Vec >::run();\
}
#define run_trans(op, scalar, mode, option) \
std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \
{ \
typedef Transform<scalar, 3, mode, option> Trans; \
typedef func<Trans, Trans, Trans, op> Func; \
test_transform<Func, Trans, Trans>::run(); \
}
#define run_trans( op, scalar, mode, option ) \
std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \
{\
typedef Transform<scalar, 3, mode, option> Trans;\
typedef func<Trans,Trans,Trans,op> Func;\
test_transform< Func, Trans, Trans >::run();\
}
int main(int argc, char* argv[]) {
cout << "vec = trans * vec" << endl;
run_vec(TV, float, Isometry, AutoAlign, 3);
run_vec(TV, float, Isometry, DontAlign, 3);
run_vec(TV, float, Isometry, AutoAlign, 4);
run_vec(TV, float, Isometry, DontAlign, 4);
run_vec(TV, float, Projective, AutoAlign, 4);
run_vec(TV, float, Projective, DontAlign, 4);
run_vec(TV, double, Isometry, AutoAlign, 3);
run_vec(TV, double, Isometry, DontAlign, 3);
run_vec(TV, double, Isometry, AutoAlign, 4);
run_vec(TV, double, Isometry, DontAlign, 4);
run_vec(TV, double, Projective, AutoAlign, 4);
run_vec(TV, double, Projective, DontAlign, 4);
int main(int argc, char* argv[])
{
cout << "vec = trans * vec" << endl;
run_vec(TV, float, Isometry, AutoAlign, 3);
run_vec(TV, float, Isometry, DontAlign, 3);
run_vec(TV, float, Isometry, AutoAlign, 4);
run_vec(TV, float, Isometry, DontAlign, 4);
run_vec(TV, float, Projective, AutoAlign, 4);
run_vec(TV, float, Projective, DontAlign, 4);
run_vec(TV, double, Isometry, AutoAlign, 3);
run_vec(TV, double, Isometry, DontAlign, 3);
run_vec(TV, double, Isometry, AutoAlign, 4);
run_vec(TV, double, Isometry, DontAlign, 4);
run_vec(TV, double, Projective, AutoAlign, 4);
run_vec(TV, double, Projective, DontAlign, 4);
cout << "vec = trans.matrix() * vec" << endl;
run_vec(TMATV, float, Isometry, AutoAlign, 4);
run_vec(TMATV, float, Isometry, DontAlign, 4);
run_vec(TMATV, double, Isometry, AutoAlign, 4);
run_vec(TMATV, double, Isometry, DontAlign, 4);
cout << "vec = trans.matrix() * vec" << endl;
run_vec(TMATV, float, Isometry, AutoAlign, 4);
run_vec(TMATV, float, Isometry, DontAlign, 4);
run_vec(TMATV, double, Isometry, AutoAlign, 4);
run_vec(TMATV, double, Isometry, DontAlign, 4);
cout << "trans = trans1 * trans" << endl;
run_trans(TV, float, Isometry, AutoAlign);
run_trans(TV, float, Isometry, DontAlign);
run_trans(TV, double, Isometry, AutoAlign);
run_trans(TV, double, Isometry, DontAlign);
run_trans(TV, float, Projective, AutoAlign);
run_trans(TV, float, Projective, DontAlign);
run_trans(TV, double, Projective, AutoAlign);
run_trans(TV, double, Projective, DontAlign);
cout << "trans = trans1 * trans" << endl;
run_trans(TV, float, Isometry, AutoAlign);
run_trans(TV, float, Isometry, DontAlign);
run_trans(TV, double, Isometry, AutoAlign);
run_trans(TV, double, Isometry, DontAlign);
run_trans(TV, float, Projective, AutoAlign);
run_trans(TV, float, Projective, DontAlign);
run_trans(TV, double, Projective, AutoAlign);
run_trans(TV, double, Projective, DontAlign);
cout << "trans = trans1.matrix() * trans.matrix()" << endl;
run_trans(TMATVMAT, float, Isometry, AutoAlign);
run_trans(TMATVMAT, float, Isometry, DontAlign);
run_trans(TMATVMAT, double, Isometry, AutoAlign);
run_trans(TMATVMAT, double, Isometry, DontAlign);
cout << "trans = trans1.matrix() * trans.matrix()" << endl;
run_trans(TMATVMAT, float, Isometry, AutoAlign);
run_trans(TMATVMAT, float, Isometry, DontAlign);
run_trans(TMATVMAT, double, Isometry, AutoAlign);
run_trans(TMATVMAT, double, Isometry, DontAlign);
}

View File

@ -14,122 +14,118 @@ using namespace Eigen;
typedef float Scalar;
__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
__attribute__((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
__attribute__((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
__attribute__((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
int main(int argc, char* argv[])
{
int size = SIZE * 8;
int size2 = size * size;
Scalar* a = internal::aligned_new<Scalar>(size2);
Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
Scalar* c = internal::aligned_new<Scalar>(size2);
for (int i=0; i<size; ++i)
{
a[i] = b[i] = c[i] = 0;
}
BenchTimer timer;
timer.reset();
for (int k=0; k<10; ++k)
{
int main(int argc, char* argv[]) {
int size = SIZE * 8;
int size2 = size * size;
Scalar* a = internal::aligned_new<Scalar>(size2);
Scalar* b = internal::aligned_new<Scalar>(size2 + 4) + 1;
Scalar* c = internal::aligned_new<Scalar>(size2);
for (int i = 0; i < size; ++i) {
a[i] = b[i] = c[i] = 0;
}
BenchTimer timer;
timer.reset();
for (int k = 0; k < 10; ++k) {
timer.start();
benchVec(a, b, c, size2);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
<< " GFlops\n";
return 0;
for (int innersize = size; innersize > 2; --innersize) {
if (size2 % innersize == 0) {
int outersize = size2 / innersize;
MatrixXf ma = Map<MatrixXf>(a, innersize, outersize);
MatrixXf mb = Map<MatrixXf>(b, innersize, outersize);
MatrixXf mc = Map<MatrixXf>(c, innersize, outersize);
timer.reset();
for (int k = 0; k < 3; ++k) {
timer.start();
benchVec(a, b, c, size2);
benchVec(ma, mb, mc);
timer.stop();
}
std::cout << innersize << " x " << outersize << " " << timer.value() << "s "
<< (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.) << " GFlops\n";
}
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
return 0;
for (int innersize = size; innersize>2 ; --innersize)
{
if (size2%innersize==0)
{
int outersize = size2/innersize;
MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
timer.reset();
for (int k=0; k<3; ++k)
{
timer.start();
benchVec(ma, mb, mc);
timer.stop();
}
std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
}
}
VectorXf va = Map<VectorXf>(a, size2);
VectorXf vb = Map<VectorXf>(b, size2);
VectorXf vc = Map<VectorXf>(c, size2);
timer.reset();
for (int k = 0; k < 3; ++k) {
timer.start();
benchVec(va, vb, vc);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2 * REPEAT) / timer.value()) / (1024. * 1024. * 1024.)
<< " GFlops\n";
return 0;
}
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) {
for (int k = 0; k < REPEAT; ++k) a = a + b;
}
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) {
for (int k = 0; k < REPEAT; ++k) a = a + b;
}
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) {
typedef internal::packet_traits<Scalar>::type PacketScalar;
const int PacketSize = internal::packet_traits<Scalar>::size;
PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
for (int k = 0; k < REPEAT; ++k)
for (int i = 0; i < size; i += PacketSize * 8) {
// a0 = internal::pload(&a[i]);
// b0 = internal::pload(&b[i]);
// a1 = internal::pload(&a[i+1*PacketSize]);
// b1 = internal::pload(&b[i+1*PacketSize]);
// a2 = internal::pload(&a[i+2*PacketSize]);
// b2 = internal::pload(&b[i+2*PacketSize]);
// a3 = internal::pload(&a[i+3*PacketSize]);
// b3 = internal::pload(&b[i+3*PacketSize]);
// internal::pstore(&a[i], internal::padd(a0, b0));
// a0 = internal::pload(&a[i+4*PacketSize]);
// b0 = internal::pload(&b[i+4*PacketSize]);
//
// internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
// a1 = internal::pload(&a[i+5*PacketSize]);
// b1 = internal::pload(&b[i+5*PacketSize]);
//
// internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
// a2 = internal::pload(&a[i+6*PacketSize]);
// b2 = internal::pload(&b[i+6*PacketSize]);
//
// internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
// a3 = internal::pload(&a[i+7*PacketSize]);
// b3 = internal::pload(&b[i+7*PacketSize]);
//
// internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
// internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
internal::pstore(&a[i + 2 * PacketSize], internal::padd(internal::ploadu(&a[i + 2 * PacketSize]),
internal::ploadu(&b[i + 2 * PacketSize])));
internal::pstore(&a[i + 3 * PacketSize], internal::padd(internal::ploadu(&a[i + 3 * PacketSize]),
internal::ploadu(&b[i + 3 * PacketSize])));
internal::pstore(&a[i + 4 * PacketSize], internal::padd(internal::ploadu(&a[i + 4 * PacketSize]),
internal::ploadu(&b[i + 4 * PacketSize])));
internal::pstore(&a[i + 5 * PacketSize], internal::padd(internal::ploadu(&a[i + 5 * PacketSize]),
internal::ploadu(&b[i + 5 * PacketSize])));
internal::pstore(&a[i + 6 * PacketSize], internal::padd(internal::ploadu(&a[i + 6 * PacketSize]),
internal::ploadu(&b[i + 6 * PacketSize])));
internal::pstore(&a[i + 7 * PacketSize], internal::padd(internal::ploadu(&a[i + 7 * PacketSize]),
internal::ploadu(&b[i + 7 * PacketSize])));
}
VectorXf va = Map<VectorXf>(a, size2);
VectorXf vb = Map<VectorXf>(b, size2);
VectorXf vc = Map<VectorXf>(c, size2);
timer.reset();
for (int k=0; k<3; ++k)
{
timer.start();
benchVec(va, vb, vc);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
return 0;
}
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
{
for (int k=0; k<REPEAT; ++k)
a = a + b;
}
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
{
for (int k=0; k<REPEAT; ++k)
a = a + b;
}
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
{
typedef internal::packet_traits<Scalar>::type PacketScalar;
const int PacketSize = internal::packet_traits<Scalar>::size;
PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
for (int k=0; k<REPEAT; ++k)
for (int i=0; i<size; i+=PacketSize*8)
{
// a0 = internal::pload(&a[i]);
// b0 = internal::pload(&b[i]);
// a1 = internal::pload(&a[i+1*PacketSize]);
// b1 = internal::pload(&b[i+1*PacketSize]);
// a2 = internal::pload(&a[i+2*PacketSize]);
// b2 = internal::pload(&b[i+2*PacketSize]);
// a3 = internal::pload(&a[i+3*PacketSize]);
// b3 = internal::pload(&b[i+3*PacketSize]);
// internal::pstore(&a[i], internal::padd(a0, b0));
// a0 = internal::pload(&a[i+4*PacketSize]);
// b0 = internal::pload(&b[i+4*PacketSize]);
//
// internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
// a1 = internal::pload(&a[i+5*PacketSize]);
// b1 = internal::pload(&b[i+5*PacketSize]);
//
// internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
// a2 = internal::pload(&a[i+6*PacketSize]);
// b2 = internal::pload(&b[i+6*PacketSize]);
//
// internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
// a3 = internal::pload(&a[i+7*PacketSize]);
// b3 = internal::pload(&b[i+7*PacketSize]);
//
// internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
// internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
}
}

View File

@ -3,7 +3,7 @@
// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out
// Compilation options:
//
//
// -DSCALAR=std::complex<double>
// -DSCALARA=double or -DSCALARB=double
// -DHAVE_BLAS
@ -14,7 +14,6 @@
#include <bench/BenchTimer.h>
#include <Eigen/Core>
using namespace std;
using namespace Eigen;
@ -45,15 +44,15 @@ const int opt_B = ColMajor;
typedef SCALAR Scalar;
typedef NumTraits<Scalar>::Real RealScalar;
typedef Matrix<SCALARA,Dynamic,Dynamic,opt_A> A;
typedef Matrix<SCALARB,Dynamic,Dynamic,opt_B> B;
typedef Matrix<Scalar,Dynamic,Dynamic> C;
typedef Matrix<RealScalar,Dynamic,Dynamic> M;
typedef Matrix<SCALARA, Dynamic, Dynamic, opt_A> A;
typedef Matrix<SCALARB, Dynamic, Dynamic, opt_B> B;
typedef Matrix<Scalar, Dynamic, Dynamic> C;
typedef Matrix<RealScalar, Dynamic, Dynamic> M;
#ifdef HAVE_BLAS
extern "C" {
#include <Eigen/src/misc/blas.h>
#include <Eigen/src/misc/blas.h>
}
static float fone = 1;
@ -65,7 +64,7 @@ static std::complex<float> cfzero = 0;
static std::complex<double> cdone = 1;
static std::complex<double> cdzero = 0;
static char notrans = 'N';
static char trans = 'T';
static char trans = 'T';
static char nonunit = 'N';
static char lower = 'L';
static char right = 'R';
@ -83,60 +82,61 @@ const char transB = trans;
const char transB = notrans;
#endif
template<typename A,typename B>
void blas_gemm(const A& a, const B& b, MatrixXf& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
template <typename A, typename B>
void blas_gemm(const A& a, const B& b, MatrixXf& c) {
int M = c.rows();
int N = c.cols();
int K = a.cols();
int lda = a.outerStride();
int ldb = b.outerStride();
int ldc = c.rows();
sgemm_(&transA,&transB,&M,&N,&K,&fone,
const_cast<float*>(a.data()),&lda,
const_cast<float*>(b.data()),&ldb,&fone,
c.data(),&ldc);
sgemm_(&transA, &transB, &M, &N, &K, &fone, const_cast<float*>(a.data()), &lda, const_cast<float*>(b.data()), &ldb,
&fone, c.data(), &ldc);
}
template<typename A,typename B>
void blas_gemm(const A& a, const B& b, MatrixXd& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
template <typename A, typename B>
void blas_gemm(const A& a, const B& b, MatrixXd& c) {
int M = c.rows();
int N = c.cols();
int K = a.cols();
int lda = a.outerStride();
int ldb = b.outerStride();
int ldc = c.rows();
dgemm_(&transA,&transB,&M,&N,&K,&done,
const_cast<double*>(a.data()),&lda,
const_cast<double*>(b.data()),&ldb,&done,
c.data(),&ldc);
dgemm_(&transA, &transB, &M, &N, &K, &done, const_cast<double*>(a.data()), &lda, const_cast<double*>(b.data()), &ldb,
&done, c.data(), &ldc);
}
template<typename A,typename B>
void blas_gemm(const A& a, const B& b, MatrixXcf& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
template <typename A, typename B>
void blas_gemm(const A& a, const B& b, MatrixXcf& c) {
int M = c.rows();
int N = c.cols();
int K = a.cols();
int lda = a.outerStride();
int ldb = b.outerStride();
int ldc = c.rows();
cgemm_(&transA,&transB,&M,&N,&K,(float*)&cfone,
const_cast<float*>((const float*)a.data()),&lda,
const_cast<float*>((const float*)b.data()),&ldb,(float*)&cfone,
(float*)c.data(),&ldc);
cgemm_(&transA, &transB, &M, &N, &K, (float*)&cfone, const_cast<float*>((const float*)a.data()), &lda,
const_cast<float*>((const float*)b.data()), &ldb, (float*)&cfone, (float*)c.data(), &ldc);
}
template<typename A,typename B>
void blas_gemm(const A& a, const B& b, MatrixXcd& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.outerStride(); int ldb = b.outerStride(); int ldc = c.rows();
template <typename A, typename B>
void blas_gemm(const A& a, const B& b, MatrixXcd& c) {
int M = c.rows();
int N = c.cols();
int K = a.cols();
int lda = a.outerStride();
int ldb = b.outerStride();
int ldc = c.rows();
zgemm_(&transA,&transB,&M,&N,&K,(double*)&cdone,
const_cast<double*>((const double*)a.data()),&lda,
const_cast<double*>((const double*)b.data()),&ldb,(double*)&cdone,
(double*)c.data(),&ldc);
zgemm_(&transA, &transB, &M, &N, &K, (double*)&cdone, const_cast<double*>((const double*)a.data()), &lda,
const_cast<double*>((const double*)b.data()), &ldb, (double*)&cdone, (double*)c.data(), &ldc);
}
#endif
void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci)
{
void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) {
cr.noalias() += ar * br;
cr.noalias() -= ai * bi;
ci.noalias() += ar * bi;
@ -144,33 +144,27 @@ void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr,
// [cr ci] += [ar ai] * br + [-ai ar] * bi
}
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci)
{
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) {
cr.noalias() += a * br;
ci.noalias() += a * bi;
}
void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci)
{
void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) {
cr.noalias() += ar * b;
ci.noalias() += ai * b;
}
template<typename A, typename B, typename C>
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c)
{
template <typename A, typename B, typename C>
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) {
c.noalias() += a * b;
}
int main(int argc, char ** argv)
{
int main(int argc, char** argv) {
std::ptrdiff_t l1 = internal::queryL1CacheSize();
std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
std::cout << "L1 cache size = " << (l1>0 ? l1/1024 : -1) << " KB\n";
std::cout << "L2/L3 cache size = " << (l2>0 ? l2/1024 : -1) << " KB\n";
typedef internal::gebp_traits<Scalar,Scalar> Traits;
std::cout << "L1 cache size = " << (l1 > 0 ? l1 / 1024 : -1) << " KB\n";
std::cout << "L2/L3 cache size = " << (l2 > 0 ? l2 / 1024 : -1) << " KB\n";
typedef internal::gebp_traits<Scalar, Scalar> Traits;
std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";
int rep = 1; // number of repetitions per try
@ -180,196 +174,220 @@ int main(int argc, char ** argv)
int m = s;
int n = s;
int p = s;
int cache_size1=-1, cache_size2=l2, cache_size3 = 0;
int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0;
bool need_help = false;
for (int i=1; i<argc;)
{
if(argv[i][0]=='-')
{
if(argv[i][1]=='s')
{
for (int i = 1; i < argc;) {
if (argv[i][0] == '-') {
if (argv[i][1] == 's') {
++i;
s = atoi(argv[i++]);
m = n = p = s;
if(argv[i][0]!='-')
{
if (argv[i][0] != '-') {
n = atoi(argv[i++]);
p = atoi(argv[i++]);
}
}
else if(argv[i][1]=='c')
{
} else if (argv[i][1] == 'c') {
++i;
cache_size1 = atoi(argv[i++]);
if(argv[i][0]!='-')
{
if (argv[i][0] != '-') {
cache_size2 = atoi(argv[i++]);
if(argv[i][0]!='-')
cache_size3 = atoi(argv[i++]);
if (argv[i][0] != '-') cache_size3 = atoi(argv[i++]);
}
}
else if(argv[i][1]=='t')
{
} else if (argv[i][1] == 't') {
tries = atoi(argv[++i]);
++i;
}
else if(argv[i][1]=='p')
{
} else if (argv[i][1] == 'p') {
++i;
rep = atoi(argv[i++]);
}
}
else
{
} else {
need_help = true;
break;
}
}
if(need_help)
{
if (need_help) {
std::cout << argv[0] << " -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
std::cout << " <matrix sizes> : size\n";
std::cout << " <matrix sizes> : rows columns depth\n";
return 1;
}
#if EIGEN_VERSION_AT_LEAST(3,2,90)
if(cache_size1>0)
setCpuCacheSizes(cache_size1,cache_size2,cache_size3);
#if EIGEN_VERSION_AT_LEAST(3, 2, 90)
if (cache_size1 > 0) setCpuCacheSizes(cache_size1, cache_size2, cache_size3);
#endif
A a(m,p); a.setRandom();
B b(p,n); b.setRandom();
C c(m,n); c.setOnes();
A a(m, p);
a.setRandom();
B b(p, n);
b.setRandom();
C c(m, n);
c.setOnes();
C rc = c;
std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
std::ptrdiff_t mc(m), nc(n), kc(p);
internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
internal::computeProductBlockingSizes<Scalar, Scalar>(kc, mc, nc);
std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << " x " << nc << "\n";
C r = c;
// check the parallel product is correct
#if defined EIGEN_HAS_OPENMP
// check the parallel product is correct
#if defined EIGEN_HAS_OPENMP
Eigen::initParallel();
int procs = omp_get_max_threads();
if(procs>1)
{
#ifdef HAVE_BLAS
blas_gemm(a,b,r);
#else
if (procs > 1) {
#ifdef HAVE_BLAS
blas_gemm(a, b, r);
#else
omp_set_num_threads(1);
r.noalias() += a * b;
omp_set_num_threads(procs);
#endif
#endif
c.noalias() += a * b;
if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
if (!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
}
#elif defined HAVE_BLAS
blas_gemm(a,b,r);
c.noalias() += a * b;
if(!r.isApprox(c)) {
std::cout << (r - c).norm()/r.norm() << "\n";
#elif defined HAVE_BLAS
blas_gemm(a, b, r);
c.noalias() += a * b;
if (!r.isApprox(c)) {
std::cout << (r - c).norm() / r.norm() << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
#else
if (1. * m * n * p < 2000. * 2000 * 2000) {
gemm(a, b, c);
r.noalias() += a.cast<Scalar>().lazyProduct(b.cast<Scalar>());
if (!r.isApprox(c)) {
std::cout << (r - c).norm() / r.norm() << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
#else
if(1.*m*n*p<2000.*2000*2000)
{
gemm(a,b,c);
r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
if(!r.isApprox(c)) {
std::cout << (r - c).norm()/r.norm() << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
}
#endif
}
#endif
#ifdef HAVE_BLAS
#ifdef HAVE_BLAS
BenchTimer tblas;
c = rc;
BENCH(tblas, tries, rep, blas_gemm(a,b,c));
std::cout << "blas cpu " << tblas.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) << "s)\n";
std::cout << "blas real " << tblas.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n";
#endif
BENCH(tblas, tries, rep, blas_gemm(a, b, c));
std::cout << "blas cpu " << tblas.best(CPU_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tblas.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER)
<< "s)\n";
std::cout << "blas real " << tblas.best(REAL_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tblas.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER)
<< "s)\n";
#endif
// warm start
if(b.norm()+a.norm()==123.554) std::cout << "\n";
if (b.norm() + a.norm() == 123.554) std::cout << "\n";
BenchTimer tmt;
c = rc;
BENCH(tmt, tries, rep, gemm(a,b,c));
std::cout << "eigen cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
std::cout << "eigen real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
BENCH(tmt, tries, rep, gemm(a, b, c));
std::cout << "eigen cpu " << tmt.best(CPU_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
<< "s)\n";
std::cout << "eigen real " << tmt.best(REAL_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
<< "s)\n";
#ifdef EIGEN_HAS_OPENMP
if(procs>1)
{
#ifdef EIGEN_HAS_OPENMP
if (procs > 1) {
BenchTimer tmono;
omp_set_num_threads(1);
Eigen::setNbThreads(1);
c = rc;
BENCH(tmono, tries, rep, gemm(a,b,c));
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n";
std::cout << "eigen mono real " << tmono.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n";
std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n";
BENCH(tmono, tries, rep, gemm(a, b, c));
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tmono.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER)
<< "s)\n";
std::cout << "eigen mono real " << tmono.best(REAL_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tmono.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t("
<< tmono.total(REAL_TIMER) << "s)\n";
std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => "
<< (100.0 * tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)) / procs << "%\n";
}
#endif
if(1.*m*n*p<30*30*30)
{
#endif
if (1. * m * n * p < 30 * 30 * 30) {
BenchTimer tmt;
c = rc;
BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b));
std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
BENCH(tmt, tries, rep, c.noalias() += a.lazyProduct(b));
std::cout << "lazy cpu " << tmt.best(CPU_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tmt.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER)
<< "s)\n";
std::cout << "lazy real " << tmt.best(REAL_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / tmt.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER)
<< "s)\n";
}
#ifdef DECOUPLED
if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
{
M ar(m,p); ar.setRandom();
M ai(m,p); ai.setRandom();
M br(p,n); br.setRandom();
M bi(p,n); bi.setRandom();
M cr(m,n); cr.setRandom();
M ci(m,n); ci.setRandom();
#ifdef DECOUPLED
if ((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
M ar(m, p);
ar.setRandom();
M ai(m, p);
ai.setRandom();
M br(p, n);
br.setRandom();
M bi(p, n);
bi.setRandom();
M cr(m, n);
cr.setRandom();
M ci(m, n);
ci.setRandom();
BenchTimer t;
BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
BENCH(t, tries, rep, matlab_cplx_cplx(ar, ai, br, bi, cr, ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
<< "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
<< "s)\n";
}
if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
{
M a(m,p); a.setRandom();
M br(p,n); br.setRandom();
M bi(p,n); bi.setRandom();
M cr(m,n); cr.setRandom();
M ci(m,n); ci.setRandom();
if ((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex)) {
M a(m, p);
a.setRandom();
M br(p, n);
br.setRandom();
M bi(p, n);
bi.setRandom();
M cr(m, n);
cr.setRandom();
M ci(m, n);
ci.setRandom();
BenchTimer t;
BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
BENCH(t, tries, rep, matlab_real_cplx(a, br, bi, cr, ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
<< "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
<< "s)\n";
}
if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex))
{
M ar(m,p); ar.setRandom();
M ai(m,p); ai.setRandom();
M b(p,n); b.setRandom();
M cr(m,n); cr.setRandom();
M ci(m,n); ci.setRandom();
if ((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex)) {
M ar(m, p);
ar.setRandom();
M ai(m, p);
ai.setRandom();
M b(p, n);
b.setRandom();
M cr(m, n);
cr.setRandom();
M ci(m, n);
ci.setRandom();
BenchTimer t;
BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
BENCH(t, tries, rep, matlab_cplx_real(ar, ai, b, cr, ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / t.best(CPU_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER)
<< "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER) / rep << "s \t"
<< (double(m) * n * p * rep * 2 / t.best(REAL_TIMER)) * 1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER)
<< "s)\n";
}
#endif
#endif
return 0;
}

View File

@ -16,23 +16,20 @@
#include <utility>
template <typename MatrixType>
void copy_matrix(MatrixType& m)
{
void copy_matrix(MatrixType& m) {
MatrixType tmp(m);
m = tmp;
}
template <typename MatrixType>
void move_matrix(MatrixType&& m)
{
void move_matrix(MatrixType&& m) {
MatrixType tmp(std::move(m));
m = std::move(tmp);
}
template<typename Scalar>
void bench(const std::string& label)
{
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>,1,10>;
template <typename Scalar>
void bench(const std::string& label) {
using MatrixType = Eigen::Matrix<Eigen::MovableScalar<Scalar>, 1, 10>;
Eigen::BenchTimer t;
int tries = 10;
@ -42,16 +39,14 @@ void bench(const std::string& label)
MatrixType dest;
BENCH(t, tries, rep, copy_matrix(data));
std::cout << label << " copy semantics: " << 1e3*t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
std::cout << label << " copy semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
BENCH(t, tries, rep, move_matrix(std::move(data)));
std::cout << label << " move semantics: " << 1e3*t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
std::cout << label << " move semantics: " << 1e3 * t.best(Eigen::CPU_TIMER) << " ms" << std::endl;
}
int main()
{
int main() {
bench<float>("float");
bench<double>("double");
return 0;
}

View File

@ -5,79 +5,64 @@
using namespace Eigen;
using namespace std;
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
{
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) {
return v.norm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
{
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) {
return v.stableNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
{
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) {
return v.hypotNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
{
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) {
return v.blueNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
{
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) {
typedef typename T::Scalar Scalar;
int n = v.size();
Scalar scale = 0;
Scalar ssq = 1;
for (int i=0;i<n;++i)
{
for (int i = 0; i < n; ++i) {
Scalar ax = std::abs(v.coeff(i));
if (scale >= ax)
{
ssq += numext::abs2(ax/scale);
}
else
{
ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
if (scale >= ax) {
ssq += numext::abs2(ax / scale);
} else {
ssq = Scalar(1) + ssq * numext::abs2(scale / ax);
scale = ax;
}
}
return scale * std::sqrt(ssq);
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
{
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) {
typedef typename T::Scalar Scalar;
Scalar s = v.array().abs().maxCoeff();
return s*(v/s).norm();
return s * (v / s).norm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v)
{
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) {
return v.stableNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
{
int n =v.size() / 2;
for (int i=0;i<n;++i)
v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
n = n/2;
while (n>0)
{
for (int i=0;i<n;++i)
v(i) = v(2*i) + v(2*i+1);
n = n/2;
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) {
int n = v.size() / 2;
for (int i = 0; i < n; ++i) v(i) = v(2 * i) * v(2 * i) + v(2 * i + 1) * v(2 * i + 1);
n = n / 2;
while (n > 0) {
for (int i = 0; i < n; ++i) v(i) = v(2 * i) + v(2 * i + 1);
n = n / 2;
}
return std::sqrt(v(0));
}
@ -85,61 +70,61 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
namespace Eigen {
namespace internal {
#ifdef EIGEN_VECTORIZE
Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a, b); }
Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a, b); }
Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a, b); }
Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a, b); }
#endif
}
}
} // namespace internal
} // namespace Eigen
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
{
#ifndef EIGEN_VECTORIZE
template <typename T>
EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) {
#ifndef EIGEN_VECTORIZE
return v.blueNorm();
#else
#else
typedef typename T::Scalar Scalar;
static int nmax = 0;
static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
int n;
if(nmax <= 0)
{
if (nmax <= 0) {
int nbig, ibeta, it, iemin, iemax, iexp;
Scalar abig, eps;
nbig = NumTraits<int>::highest(); // largest integer
ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base; // base for floating-point numbers
it = NumTraits<Scalar>::digits(); // NumTraits<Scalar>::Mantissa; // number of base-beta digits in mantissa
nbig = NumTraits<int>::highest(); // largest integer
ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base; // base for
// floating-point numbers
it = NumTraits<Scalar>::digits(); // NumTraits<Scalar>::Mantissa; // number of base-beta digits in
// mantissa
iemin = NumTraits<Scalar>::min_exponent(); // minimum exponent
iemax = NumTraits<Scalar>::max_exponent(); // maximum exponent
rbig = NumTraits<Scalar>::highest(); // largest floating-point number
rbig = NumTraits<Scalar>::highest(); // largest floating-point number
// Check the basic machine-dependent constants.
if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
|| (it<=4 && ibeta <= 3 ) || it<2)
{
if (iemin > 1 - 2 * it || 1 + it > iemax || (it == 2 && ibeta < 5) || (it <= 4 && ibeta <= 3) || it < 2) {
eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
}
iexp = -((1-iemin)/2);
b1 = std::pow(ibeta, iexp); // lower boundary of midrange
iexp = (iemax + 1 - it)/2;
b2 = std::pow(ibeta,iexp); // upper boundary of midrange
iexp = -((1 - iemin) / 2);
b1 = std::pow(ibeta, iexp); // lower boundary of midrange
iexp = (iemax + 1 - it) / 2;
b2 = std::pow(ibeta, iexp); // upper boundary of midrange
iexp = (2-iemin)/2;
s1m = std::pow(ibeta,iexp); // scaling factor for lower range
iexp = - ((iemax+it)/2);
s2m = std::pow(ibeta,iexp); // scaling factor for upper range
iexp = (2 - iemin) / 2;
s1m = std::pow(ibeta, iexp); // scaling factor for lower range
iexp = -((iemax + it) / 2);
s2m = std::pow(ibeta, iexp); // scaling factor for upper range
overfl = rbig*s2m; // overflow boundary for abig
eps = std::pow(ibeta, 1-it);
relerr = std::sqrt(eps); // tolerance for neglecting asml
abig = 1.0/eps - 1.0;
if (Scalar(nbig)>abig) nmax = abig; // largest safe n
else nmax = nbig;
overfl = rbig * s2m; // overflow boundary for abig
eps = std::pow(ibeta, 1 - it);
relerr = std::sqrt(eps); // tolerance for neglecting asml
abig = 1.0 / eps - 1.0;
if (Scalar(nbig) > abig)
nmax = abig; // largest safe n
else
nmax = nbig;
}
typedef typename internal::packet_traits<Scalar>::type Packet;
@ -149,108 +134,103 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
Packet pabig = internal::pset1<Packet>(Scalar(0));
Packet ps2m = internal::pset1<Packet>(s2m);
Packet ps1m = internal::pset1<Packet>(s1m);
Packet pb2 = internal::pset1<Packet>(b2);
Packet pb1 = internal::pset1<Packet>(b1);
for(int j=0; j<v.size(); j+=ps)
{
Packet pb2 = internal::pset1<Packet>(b2);
Packet pb1 = internal::pset1<Packet>(b1);
for (int j = 0; j < v.size(); j += ps) {
Packet ax = internal::pabs(v.template packet<Aligned>(j));
Packet ax_s2m = internal::pmul(ax,ps2m);
Packet ax_s1m = internal::pmul(ax,ps1m);
Packet maskBig = internal::plt(pb2,ax);
Packet maskSml = internal::plt(ax,pb1);
Packet ax_s2m = internal::pmul(ax, ps2m);
Packet ax_s1m = internal::pmul(ax, ps1m);
Packet maskBig = internal::plt(pb2, ax);
Packet maskSml = internal::plt(ax, pb1);
// Packet maskMed = internal::pand(maskSml,maskBig);
// Packet scale = internal::pset1(Scalar(0));
// scale = internal::por(scale, internal::pand(maskBig,ps2m));
// scale = internal::por(scale, internal::pand(maskSml,ps1m));
// scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
// ax = internal::pmul(ax,scale);
// ax = internal::pmul(ax,ax);
// pabig = internal::padd(pabig, internal::pand(maskBig, ax));
// pasml = internal::padd(pasml, internal::pand(maskSml, ax));
// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
// Packet maskMed = internal::pand(maskSml,maskBig);
// Packet scale = internal::pset1(Scalar(0));
// scale = internal::por(scale, internal::pand(maskBig,ps2m));
// scale = internal::por(scale, internal::pand(maskSml,ps1m));
// scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
// ax = internal::pmul(ax,scale);
// ax = internal::pmul(ax,ax);
// pabig = internal::padd(pabig, internal::pand(maskBig, ax));
// pasml = internal::padd(pasml, internal::pand(maskSml, ax));
// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m, ax_s2m)));
pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m, ax_s1m)));
pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax, ax), internal::pand(maskSml, maskBig)));
}
Scalar abig = internal::predux(pabig);
Scalar asml = internal::predux(pasml);
Scalar amed = internal::predux(pamed);
if(abig > Scalar(0))
{
if (abig > Scalar(0)) {
abig = std::sqrt(abig);
if(abig > overfl)
{
if (abig > overfl) {
eigen_assert(false && "overflow");
return rbig;
}
if(amed > Scalar(0))
{
abig = abig/s2m;
if (amed > Scalar(0)) {
abig = abig / s2m;
amed = std::sqrt(amed);
}
else
{
return abig/s2m;
} else {
return abig / s2m;
}
}
else if(asml > Scalar(0))
{
if (amed > Scalar(0))
{
} else if (asml > Scalar(0)) {
if (amed > Scalar(0)) {
abig = std::sqrt(amed);
amed = std::sqrt(asml) / s1m;
} else {
return std::sqrt(asml) / s1m;
}
else
{
return std::sqrt(asml)/s1m;
}
}
else
{
} else {
return std::sqrt(amed);
}
asml = std::min(abig, amed);
abig = std::max(abig, amed);
if(asml <= abig*relerr)
if (asml <= abig * relerr)
return abig;
else
return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
#endif
return abig * std::sqrt(Scalar(1) + numext::abs2(asml / abig));
#endif
}
#define BENCH_PERF(NRM) { \
float af = 0; double ad = 0; std::complex<float> ac = 0; \
Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
for (int k=0; k<tries; ++k) { \
tf.start(); \
for (int i=0; i<iters; ++i) { af += NRM(vf); } \
tf.stop(); \
} \
for (int k=0; k<tries; ++k) { \
td.start(); \
for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
td.stop(); \
} \
/*for (int k=0; k<std::max(1,tries/3); ++k) { \
tcf.start(); \
for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
tcf.stop(); \
} */\
std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
}
#define BENCH_PERF(NRM) \
{ \
float af = 0; \
double ad = 0; \
std::complex<float> ac = 0; \
Eigen::BenchTimer tf, td, tcf; \
tf.reset(); \
td.reset(); \
tcf.reset(); \
for (int k = 0; k < tries; ++k) { \
tf.start(); \
for (int i = 0; i < iters; ++i) { \
af += NRM(vf); \
} \
tf.stop(); \
} \
for (int k = 0; k < tries; ++k) { \
td.start(); \
for (int i = 0; i < iters; ++i) { \
ad += NRM(vd); \
} \
td.stop(); \
} \
/*for (int k=0; k<std::max(1,tries/3); ++k) { \
tcf.start(); \
for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
tcf.stop(); \
} */ \
std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
}
void check_accuracy(double basef, double based, int s)
{
void check_accuracy(double basef, double based, int s) {
double yf = basef * std::abs(internal::random<double>());
double yd = based * std::abs(internal::random<double>());
VectorXf vf = VectorXf::Ones(s) * yf;
VectorXd vd = VectorXd::Ones(s) * yd;
std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
std::cout << "reference\t" << std::sqrt(double(s)) * yf << "\t" << std::sqrt(double(s)) * yd << "\n";
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
@ -260,34 +240,38 @@ void check_accuracy(double basef, double based, int s)
std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
}
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
{
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) {
VectorXf vf(s);
VectorXd vd(s);
for (int i=0; i<s; ++i)
{
vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
for (int i = 0; i < s; ++i) {
vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0, ef1));
vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0, ed1));
}
//std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
// std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>())
<< "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>())
<< "\t" << hypotNorm(vd.cast<long double>()) << "\n";
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t"
<< blueNorm(vd.cast<long double>()) << "\n";
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>())
<< "\t" << blueNorm(vd.cast<long double>()) << "\n";
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>())
<< "\t" << lapackNorm(vd.cast<long double>()) << "\n";
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t"
<< twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long
// double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
}
int main(int argc, char** argv)
{
int main(int argc, char** argv) {
int tries = 10;
int iters = 100000;
double y = 1.1345743233455785456788e12 * internal::random<double>();
VectorXf v = VectorXf::Ones(1024) * y;
// return 0;
// return 0;
int s = 10000;
double basef_ok = 1.1345743233455785456788e15;
double based_ok = 1.1345743233455785456788e95;
@ -310,22 +294,20 @@ int main(int argc, char** argv)
check_accuracy(basef_over, based_over, s);
std::cerr << "\nVarying (over):\n";
for (int k=0; k<1; ++k)
{
check_accuracy_var(20,27,190,302,s);
for (int k = 0; k < 1; ++k) {
check_accuracy_var(20, 27, 190, 302, s);
std::cout << "\n";
}
std::cerr << "\nVarying (under):\n";
for (int k=0; k<1; ++k)
{
check_accuracy_var(-27,20,-302,-190,s);
for (int k = 0; k < 1; ++k) {
check_accuracy_var(-27, 20, -302, -190, s);
std::cout << "\n";
}
y = 1;
std::cout.precision(4);
int s1 = 1024*1024*32;
int s1 = 1024 * 1024 * 32;
std::cerr << "Performance (out of cache, " << s1 << "):\n";
{
int iters = 1;

View File

@ -15,70 +15,62 @@ using namespace Eigen;
typedef double Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
{
__attribute__((noinline)) void bench_reverse(const MatrixType& m) {
int rows = m.rows();
int cols = m.cols();
int size = m.size();
int repeats = (REPEAT*1000)/size;
MatrixType a = MatrixType::Random(rows,cols);
MatrixType b = MatrixType::Random(rows,cols);
int repeats = (REPEAT * 1000) / size;
MatrixType a = MatrixType::Random(rows, cols);
MatrixType b = MatrixType::Random(rows, cols);
BenchTimer timerB, timerH, timerV;
Scalar acc = 0;
int r = internal::random<int>(0,rows-1);
int c = internal::random<int>(0,cols-1);
for (int t=0; t<TRIES; ++t)
{
int r = internal::random<int>(0, rows - 1);
int c = internal::random<int>(0, cols - 1);
for (int t = 0; t < TRIES; ++t) {
timerB.start();
for (int k=0; k<repeats; ++k)
{
for (int k = 0; k < repeats; ++k) {
asm("#begin foo");
b = a.reverse();
asm("#end foo");
acc += b.coeff(r,c);
acc += b.coeff(r, c);
}
timerB.stop();
}
if (MatrixType::RowsAtCompileTime==Dynamic)
if (MatrixType::RowsAtCompileTime == Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << rows << " x " << cols << " \t"
<< (timerB.value() * REPEAT) / repeats << "s "
<< "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t";
std::cout << rows << " x " << cols << " \t" << (timerB.value() * REPEAT) / repeats << "s "
<< "(" << 1e-6 * size * repeats / timerB.value() << " MFLOPS)\t";
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
if (acc == 123) std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
int main(int argc, char* argv[]) {
const int dynsizes[] = {4, 6, 8, 16, 24, 32, 49, 64, 128, 256, 512, 900, 0};
std::cout << "size no sqrt standard";
// #ifdef BENCH_GSL
// std::cout << " GSL (standard + double + ATLAS) ";
// #endif
// #ifdef BENCH_GSL
// std::cout << " GSL (standard + double + ATLAS) ";
// #endif
std::cout << "\n";
for (uint i=0; dynsizes[i]>0; ++i)
{
bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i]));
for (uint i = 0; dynsizes[i] > 0; ++i) {
bench_reverse(Matrix<Scalar, Dynamic, Dynamic>(dynsizes[i], dynsizes[i]));
bench_reverse(Matrix<Scalar, Dynamic, 1>(dynsizes[i] * dynsizes[i]));
}
// bench_reverse(Matrix<Scalar,2,2>());
// bench_reverse(Matrix<Scalar,3,3>());
// bench_reverse(Matrix<Scalar,4,4>());
// bench_reverse(Matrix<Scalar,5,5>());
// bench_reverse(Matrix<Scalar,6,6>());
// bench_reverse(Matrix<Scalar,7,7>());
// bench_reverse(Matrix<Scalar,8,8>());
// bench_reverse(Matrix<Scalar,12,12>());
// bench_reverse(Matrix<Scalar,16,16>());
// bench_reverse(Matrix<Scalar,2,2>());
// bench_reverse(Matrix<Scalar,3,3>());
// bench_reverse(Matrix<Scalar,4,4>());
// bench_reverse(Matrix<Scalar,5,5>());
// bench_reverse(Matrix<Scalar,6,6>());
// bench_reverse(Matrix<Scalar,7,7>());
// bench_reverse(Matrix<Scalar,8,8>());
// bench_reverse(Matrix<Scalar,12,12>());
// bench_reverse(Matrix<Scalar,16,16>());
return 0;
}

View File

@ -3,15 +3,13 @@
using namespace Eigen;
using namespace std;
int main()
{
typedef Matrix<SCALAR,Eigen::Dynamic,1> Vec;
int main() {
typedef Matrix<SCALAR, Eigen::Dynamic, 1> Vec;
Vec v(SIZE);
v.setZero();
v[0] = 1;
v[1] = 2;
for(int i = 0; i < 1000000; i++)
{
for (int i = 0; i < 1000000; i++) {
v.coeffRef(0) += v.sum() * SCALAR(1e-20);
}
cout << v.sum() << endl;

View File

@ -59,14 +59,12 @@ static_assert(maxsize > minsize, "maxsize must be larger than minsize");
static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
// just a helper to store a triple of K,M,N sizes for matrix product
struct size_triple_t
{
struct size_triple_t {
size_t k, m, n;
size_triple_t() : k(0), m(0), n(0) {}
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
size_triple_t(uint16_t compact)
{
size_triple_t(uint16_t compact) {
k = 1 << ((compact & 0xf00) >> 8);
m = 1 << ((compact & 0x0f0) >> 4);
n = 1 << ((compact & 0x00f) >> 0);
@ -82,50 +80,35 @@ uint8_t log2_pot(size_t x) {
// Convert between size tripes and a compact form fitting in 12 bits
// where each size, which must be a POT, is encoded as its log2, on 4 bits
// so the largest representable size is 2^15 == 32k ... big enough.
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
{
uint16_t compact_size_triple(size_t k, size_t m, size_t n) {
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
}
uint16_t compact_size_triple(const size_triple_t& t)
{
return compact_size_triple(t.k, t.m, t.n);
}
uint16_t compact_size_triple(const size_triple_t& t) { return compact_size_triple(t.k, t.m, t.n); }
// A single benchmark. Initially only contains benchmark params.
// Then call run(), which stores the result in the gflops field.
struct benchmark_t
{
struct benchmark_t {
uint16_t compact_product_size;
uint16_t compact_block_size;
bool use_default_block_size;
float gflops;
benchmark_t()
: compact_product_size(0)
, compact_block_size(0)
, use_default_block_size(false)
, gflops(0)
{
}
benchmark_t(size_t pk, size_t pm, size_t pn,
size_t bk, size_t bm, size_t bn)
: compact_product_size(compact_size_triple(pk, pm, pn))
, compact_block_size(compact_size_triple(bk, bm, bn))
, use_default_block_size(false)
, gflops(0)
{}
benchmark_t() : compact_product_size(0), compact_block_size(0), use_default_block_size(false), gflops(0) {}
benchmark_t(size_t pk, size_t pm, size_t pn, size_t bk, size_t bm, size_t bn)
: compact_product_size(compact_size_triple(pk, pm, pn)),
compact_block_size(compact_size_triple(bk, bm, bn)),
use_default_block_size(false),
gflops(0) {}
benchmark_t(size_t pk, size_t pm, size_t pn)
: compact_product_size(compact_size_triple(pk, pm, pn))
, compact_block_size(0)
, use_default_block_size(true)
, gflops(0)
{}
: compact_product_size(compact_size_triple(pk, pm, pn)),
compact_block_size(0),
use_default_block_size(true),
gflops(0) {}
void run();
};
ostream& operator<<(ostream& s, const benchmark_t& b)
{
ostream& operator<<(ostream& s, const benchmark_t& b) {
s << hex << b.compact_product_size << dec;
if (b.use_default_block_size) {
size_triple_t t(b.compact_product_size);
@ -141,17 +124,14 @@ ostream& operator<<(ostream& s, const benchmark_t& b)
// We sort first by increasing benchmark parameters,
// then by decreasing performance.
bool operator<(const benchmark_t& b1, const benchmark_t& b2)
{
bool operator<(const benchmark_t& b1, const benchmark_t& b2) {
return b1.compact_product_size < b2.compact_product_size ||
(b1.compact_product_size == b2.compact_product_size && (
(b1.compact_block_size < b2.compact_block_size || (
b1.compact_block_size == b2.compact_block_size &&
b1.gflops > b2.gflops))));
(b1.compact_product_size == b2.compact_product_size &&
((b1.compact_block_size < b2.compact_block_size ||
(b1.compact_block_size == b2.compact_block_size && b1.gflops > b2.gflops))));
}
void benchmark_t::run()
{
void benchmark_t::run() {
size_triple_t productsizes(compact_product_size);
if (use_default_block_size) {
@ -168,26 +148,22 @@ void benchmark_t::run()
// set up the matrix pool
const size_t combined_three_matrices_sizes =
sizeof(Scalar) *
(productsizes.k * productsizes.m +
productsizes.k * productsizes.n +
productsizes.m * productsizes.n);
sizeof(Scalar) *
(productsizes.k * productsizes.m + productsizes.k * productsizes.n + productsizes.m * productsizes.n);
// 64 M is large enough that nobody has a cache bigger than that,
// while still being small enough that everybody has this much RAM,
// so conveniently we don't need to special-case platforms here.
const size_t unlikely_large_cache_size = 64 << 20;
const size_t working_set_size =
min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
const size_t working_set_size = min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
const size_t matrix_pool_size =
1 + working_set_size / combined_three_matrices_sizes;
const size_t matrix_pool_size = 1 + working_set_size / combined_three_matrices_sizes;
MatrixType* lhs = new MatrixType[matrix_pool_size];
MatrixType* rhs = new MatrixType[matrix_pool_size];
MatrixType* dst = new MatrixType[matrix_pool_size];
MatrixType *lhs = new MatrixType[matrix_pool_size];
MatrixType *rhs = new MatrixType[matrix_pool_size];
MatrixType *dst = new MatrixType[matrix_pool_size];
for (size_t i = 0; i < matrix_pool_size; i++) {
lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
@ -200,7 +176,6 @@ void benchmark_t::run()
float time_per_iter = 0.0f;
size_t matrix_index = 0;
while (true) {
double starttime = timer.getCpuTime();
for (int i = 0; i < iters_at_a_time; i++) {
dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
@ -228,8 +203,7 @@ void benchmark_t::run()
gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
}
void print_cpuinfo()
{
void print_cpuinfo() {
#ifdef __linux__
cout << "contents of /proc/cpuinfo:" << endl;
string line;
@ -249,33 +223,30 @@ void print_cpuinfo()
}
template <typename T>
string type_name()
{
string type_name() {
return "unknown";
}
template<>
string type_name<float>()
{
template <>
string type_name<float>() {
return "float";
}
template<>
string type_name<double>()
{
template <>
string type_name<double>() {
return "double";
}
struct action_t
{
virtual const char* invokation_name() const { abort(); return nullptr; }
struct action_t {
virtual const char* invokation_name() const {
abort();
return nullptr;
}
virtual void run() const { abort(); }
virtual ~action_t() {}
};
void show_usage_and_exit(int /*argc*/, char* argv[],
const vector<unique_ptr<action_t>>& available_actions)
{
void show_usage_and_exit(int /*argc*/, char* argv[], const vector<unique_ptr<action_t>>& available_actions) {
cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
cerr << "available actions:" << endl << endl;
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
@ -293,11 +264,10 @@ void show_usage_and_exit(int /*argc*/, char* argv[],
cerr << " avoid warm caches." << endl;
exit(1);
}
float measure_clock_speed()
{
float measure_clock_speed() {
cerr << "Measuring clock speed... \r" << flush;
vector<float> all_gflops;
for (int i = 0; i < 8; i++) {
benchmark_t b(1024, 1024, 1024);
@ -315,14 +285,12 @@ float measure_clock_speed()
return result;
}
struct human_duration_t
{
struct human_duration_t {
int seconds;
human_duration_t(int s) : seconds(s) {}
};
ostream& operator<<(ostream& s, const human_duration_t& d)
{
ostream& operator<<(ostream& s, const human_duration_t& d) {
int remainder = d.seconds;
if (remainder > 3600) {
int hours = remainder / 3600;
@ -342,8 +310,7 @@ ostream& operator<<(ostream& s, const human_duration_t& d)
const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
{
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run) {
FILE* file = fopen(filename, "w");
if (!file) {
cerr << "Could not open file " << filename << " for writing." << endl;
@ -358,8 +325,7 @@ void serialize_benchmarks(const char* filename, const vector<benchmark_t>& bench
fclose(file);
}
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
{
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run) {
FILE* file = fopen(filename, "r");
if (!file) {
return false;
@ -382,11 +348,7 @@ bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmark
return true;
}
void try_run_some_benchmarks(
vector<benchmark_t>& benchmarks,
double time_start,
size_t& first_benchmark_to_run)
{
void try_run_some_benchmarks(vector<benchmark_t>& benchmarks, double time_start, size_t& first_benchmark_to_run) {
if (first_benchmark_to_run == benchmarks.size()) {
return;
}
@ -402,9 +364,7 @@ void try_run_some_benchmarks(
time_now = timer.getRealTime();
// We check clock speed every minute and at the end.
if (benchmark_index == benchmarks.size() ||
time_now > time_last_clock_speed_measurement + 60.0f)
{
if (benchmark_index == benchmarks.size() || time_now > time_last_clock_speed_measurement + 60.0f) {
time_last_clock_speed_measurement = time_now;
// Ensure that clock speed is as expected
@ -425,8 +385,7 @@ void try_run_some_benchmarks(
// which invalidates all benchmark results collected so far.
// Either way, we better restart all over again now.
if (benchmark_index) {
cerr << "Restarting at " << 100.0f * ratio_done
<< " % because clock speed increased. " << endl;
cerr << "Restarting at " << 100.0f * ratio_done << " % because clock speed increased. " << endl;
}
max_clock_speed = current_clock_speed;
first_benchmark_to_run = 0;
@ -436,12 +395,9 @@ void try_run_some_benchmarks(
bool rerun_last_tests = false;
if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
cerr << "Measurements completed so far: "
<< 100.0f * ratio_done
<< " % " << endl;
cerr << "Clock speed seems to be only "
<< current_clock_speed/max_clock_speed
<< " times what it used to be." << endl;
cerr << "Measurements completed so far: " << 100.0f * ratio_done << " % " << endl;
cerr << "Clock speed seems to be only " << current_clock_speed / max_clock_speed << " times what it used to be."
<< endl;
unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
@ -454,9 +410,8 @@ void try_run_some_benchmarks(
exit(2);
}
rerun_last_tests = true;
cerr << "Sleeping "
<< seconds_to_sleep_if_lower_clock_speed
<< " s... \r" << endl;
cerr << "Sleeping " << seconds_to_sleep_if_lower_clock_speed << " s... \r"
<< endl;
sleep(seconds_to_sleep_if_lower_clock_speed);
current_clock_speed = measure_clock_speed();
seconds_to_sleep_if_lower_clock_speed *= 2;
@ -464,8 +419,7 @@ void try_run_some_benchmarks(
}
if (rerun_last_tests) {
cerr << "Redoing the last "
<< 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
cerr << "Redoing the last " << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
<< " % because clock speed had been low. " << endl;
return;
}
@ -486,8 +440,7 @@ void try_run_some_benchmarks(
// Display progress info on stderr
if (time_now > time_last_progress_update + 1.0f) {
time_last_progress_update = time_now;
cerr << "Measurements... " << 100.0f * ratio_done
<< " %, ETA "
cerr << "Measurements... " << 100.0f * ratio_done << " %, ETA "
<< human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
<< " \r" << flush;
}
@ -498,19 +451,15 @@ void try_run_some_benchmarks(
}
}
void run_benchmarks(vector<benchmark_t>& benchmarks)
{
void run_benchmarks(vector<benchmark_t>& benchmarks) {
size_t first_benchmark_to_run;
vector<benchmark_t> deserialized_benchmarks;
bool use_deserialized_benchmarks = false;
if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
cerr << "Found serialized session with "
<< 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
cerr << "Found serialized session with " << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
<< " % already done" << endl;
if (deserialized_benchmarks.size() == benchmarks.size() &&
first_benchmark_to_run > 0 &&
first_benchmark_to_run < benchmarks.size())
{
if (deserialized_benchmarks.size() == benchmarks.size() && first_benchmark_to_run > 0 &&
first_benchmark_to_run < benchmarks.size()) {
use_deserialized_benchmarks = true;
}
}
@ -531,15 +480,13 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
for (int i = 0; i < 4; i++) {
max_clock_speed = max(max_clock_speed, measure_clock_speed());
}
double time_start = 0.0;
while (first_benchmark_to_run < benchmarks.size()) {
if (first_benchmark_to_run == 0) {
time_start = timer.getRealTime();
}
try_run_some_benchmarks(benchmarks,
time_start,
first_benchmark_to_run);
try_run_some_benchmarks(benchmarks, time_start, first_benchmark_to_run);
}
// Sort timings by increasing benchmark parameters, and decreasing gflops.
@ -550,10 +497,8 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
// Collect best (i.e. now first) results for each parameter values.
vector<benchmark_t> best_benchmarks;
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
if (best_benchmarks.empty() ||
best_benchmarks.back().compact_product_size != it->compact_product_size ||
best_benchmarks.back().compact_block_size != it->compact_block_size)
{
if (best_benchmarks.empty() || best_benchmarks.back().compact_product_size != it->compact_product_size ||
best_benchmarks.back().compact_block_size != it->compact_block_size) {
best_benchmarks.push_back(*it);
}
}
@ -562,11 +507,9 @@ void run_benchmarks(vector<benchmark_t>& benchmarks)
benchmarks = best_benchmarks;
}
struct measure_all_pot_sizes_action_t : action_t
{
struct measure_all_pot_sizes_action_t : action_t {
virtual const char* invokation_name() const { return "all-pot-sizes"; }
virtual void run() const
{
virtual void run() const {
vector<benchmark_t> benchmarks;
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
@ -593,11 +536,9 @@ struct measure_all_pot_sizes_action_t : action_t
}
};
struct measure_default_sizes_action_t : action_t
{
struct measure_default_sizes_action_t : action_t {
virtual const char* invokation_name() const { return "default-sizes"; }
virtual void run() const
{
virtual void run() const {
vector<benchmark_t> benchmarks;
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
@ -618,8 +559,7 @@ struct measure_default_sizes_action_t : action_t
}
};
int main(int argc, char* argv[])
{
int main(int argc, char* argv[]) {
double time_start = timer.getRealTime();
cout.precision(4);
cerr.precision(4);
@ -647,7 +587,7 @@ int main(int argc, char* argv[])
for (int i = 2; i < argc; i++) {
if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
const char* equals_sign = strchr(argv[i], '=');
min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
min_working_set_size = strtoul(equals_sign + 1, nullptr, 10);
} else {
cerr << "unrecognized option: " << argv[i] << endl << endl;
show_usage_and_exit(argc, argv, available_actions);
@ -657,7 +597,7 @@ int main(int argc, char* argv[])
print_cpuinfo();
cout << "benchmark parameters:" << endl;
cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
cout << "pointer size: " << 8 * sizeof(void*) << " bits" << endl;
cout << "scalar type: " << type_name<Scalar>() << endl;
cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
cout << "minsize = " << minsize << endl;

View File

@ -19,21 +19,18 @@ using namespace Eigen;
#define SCALAR double
#endif
int main(int argc, char *argv[])
{
Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones();
Matrix<SCALAR,MATSIZE,MATSIZE> m;
for(int i = 0; i < MATSIZE; i++)
for(int j = 0; j < MATSIZE; j++)
{
m(i,j) = (i+MATSIZE*j);
}
asm("#begin");
for(int a = 0; a < REPEAT; a++)
{
m = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones() + 0.00005 * (m + (m*m));
int main(int argc, char *argv[]) {
Matrix<SCALAR, MATSIZE, MATSIZE> I = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones();
Matrix<SCALAR, MATSIZE, MATSIZE> m;
for (int i = 0; i < MATSIZE; i++)
for (int j = 0; j < MATSIZE; j++) {
m(i, j) = (i + MATSIZE * j);
}
asm("#end");
cout << m << endl;
return 0;
asm("#begin");
for (int a = 0; a < REPEAT; a++) {
m = Matrix<SCALAR, MATSIZE, MATSIZE>::Ones() + 0.00005 * (m + (m * m));
}
asm("#end");
cout << m << endl;
return 0;
}

View File

@ -15,23 +15,21 @@ using namespace Eigen;
#define SCALAR float
#endif
int main(int argc, char *argv[])
{
int main(int argc, char *argv[]) {
typedef Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic> Mat;
Mat m(100, 100);
m.setRandom();
for(int a = 0; a < REPEAT; a++)
{
for (int a = 0; a < REPEAT; a++) {
int r, c, nr, nc;
r = Eigen::internal::random<int>(0,10);
c = Eigen::internal::random<int>(0,10);
nr = Eigen::internal::random<int>(50,80);
nc = Eigen::internal::random<int>(50,80);
m.block(r,c,nr,nc) += Mat::Ones(nr,nc);
m.block(r,c,nr,nc) *= SCALAR(10);
m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
m.block(r,c,nr,nc) /= SCALAR(10);
r = Eigen::internal::random<int>(0, 10);
c = Eigen::internal::random<int>(0, 10);
nr = Eigen::internal::random<int>(50, 80);
nc = Eigen::internal::random<int>(50, 80);
m.block(r, c, nr, nc) += Mat::Ones(nr, nc);
m.block(r, c, nr, nc) *= SCALAR(10);
m.block(r, c, nr, nc) -= Mat::constant(nr, nc, 10);
m.block(r, c, nr, nc) /= SCALAR(10);
}
cout << m[0] << endl;
return 0;

View File

@ -19,18 +19,16 @@ using namespace Eigen;
#define REPEAT 100
#endif
int main(int argc, char *argv[])
{
MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE);
MATTYPE m(MATSIZE,MATSIZE);
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
{
m(i,j) = (i+j+1)/(MATSIZE*MATSIZE);
}
for(int a = 0; a < REPEAT; a++)
{
m = I + 0.0001 * (m + m*m);
}
cout << m(0,0) << endl;
return 0;
int main(int argc, char *argv[]) {
MATTYPE I = MATTYPE::Ones(MATSIZE, MATSIZE);
MATTYPE m(MATSIZE, MATSIZE);
for (int i = 0; i < MATSIZE; i++)
for (int j = 0; j < MATSIZE; j++) {
m(i, j) = (i + j + 1) / (MATSIZE * MATSIZE);
}
for (int a = 0; a < REPEAT; a++) {
m = I + 0.0001 * (m + m * m);
}
cout << m(0, 0) << endl;
return 0;
}

View File

@ -18,18 +18,15 @@ using namespace Eigen;
#define REPEAT 1000
#endif
int main(int argc, char *argv[])
{
VECTYPE I = VECTYPE::Ones(VECSIZE);
VECTYPE m(VECSIZE,1);
for(int i = 0; i < VECSIZE; i++)
{
m[i] = 0.1 * i/VECSIZE;
}
for(int a = 0; a < REPEAT; a++)
{
m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
}
cout << m[0] << endl;
return 0;
int main(int argc, char *argv[]) {
VECTYPE I = VECTYPE::Ones(VECSIZE);
VECTYPE m(VECSIZE, 1);
for (int i = 0; i < VECSIZE; i++) {
m[i] = 0.1 * i / VECSIZE;
}
for (int a = 0; a < REPEAT; a++) {
m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m / 4);
}
cout << m[0] << endl;
return 0;
}

View File

@ -28,101 +28,80 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_aat_product {
public :
public:
// Ctor
Action_aat_product( int size ):_size(size)
{
Action_aat_product(int size) : _size(size) {
MESSAGE("Action_aat_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
init_matrix<pseudo_random>(A_stl, _size);
init_matrix<null_function>(X_stl, _size);
init_matrix<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(A_ref, A_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::matrix_from_stl(X, X_stl);
}
// invalidate copy ctor
Action_aat_product( const Action_aat_product & )
{
Action_aat_product(const Action_aat_product&) {
INFOS("illegal call to Action_aat_product Copy Ctor");
exit(0);
}
// Dtor
~Action_aat_product( void ){
~Action_aat_product(void) {
MESSAGE("Action_aat_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(A, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(A_ref, _size);
Interface::free_matrix(X_ref, _size);
}
// action name
static inline std::string name( void )
{
return "aat_"+Interface::name();
static inline std::string name(void) { return "aat_" + Interface::name(); }
double nb_op_base(void) { return double(_size) * double(_size) * double(_size); }
inline void initialize(void) {
Interface::copy_matrix(A_ref, A, _size);
Interface::copy_matrix(X_ref, X, _size);
}
double nb_op_base( void ){
return double(_size)*double(_size)*double(_size);
}
inline void calculate(void) { Interface::aat_product(A, X, _size); }
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::aat_product(A,X,_size);
}
void check_result( void ){
if (_size>128) return;
void check_result(void) {
if (_size > 128) return;
// calculation check
Interface::matrix_to_stl(X,resu_stl);
Interface::matrix_to_stl(X, resu_stl);
STL_interface<typename Interface::real_type>::aat_product(A_stl,X_stl,_size);
STL_interface<typename Interface::real_type>::aat_product(A_stl, X_stl, _size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
if (error>1.e-6){
if (error > 1.e-6) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
private :
private:
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix resu_stl;
@ -133,13 +112,7 @@ private :
typename Interface::gene_matrix A;
typename Interface::gene_matrix X;
int _size;
};
#endif

View File

@ -28,101 +28,80 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_ata_product {
public :
public:
// Ctor
Action_ata_product( int size ):_size(size)
{
Action_ata_product(int size) : _size(size) {
MESSAGE("Action_ata_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
init_matrix<pseudo_random>(A_stl, _size);
init_matrix<null_function>(X_stl, _size);
init_matrix<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(A_ref, A_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::matrix_from_stl(X, X_stl);
}
// invalidate copy ctor
Action_ata_product( const Action_ata_product & )
{
Action_ata_product(const Action_ata_product&) {
INFOS("illegal call to Action_ata_product Copy Ctor");
exit(0);
}
// Dtor
~Action_ata_product( void ){
~Action_ata_product(void) {
MESSAGE("Action_ata_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(A, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(A_ref, _size);
Interface::free_matrix(X_ref, _size);
}
// action name
static inline std::string name( void )
{
return "ata_"+Interface::name();
static inline std::string name(void) { return "ata_" + Interface::name(); }
double nb_op_base(void) { return 2.0 * _size * _size * _size; }
inline void initialize(void) {
Interface::copy_matrix(A_ref, A, _size);
Interface::copy_matrix(X_ref, X, _size);
}
double nb_op_base( void ){
return 2.0*_size*_size*_size;
}
inline void calculate(void) { Interface::ata_product(A, X, _size); }
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::ata_product(A,X,_size);
}
void check_result( void ){
if (_size>128) return;
void check_result(void) {
if (_size > 128) return;
// calculation check
Interface::matrix_to_stl(X,resu_stl);
Interface::matrix_to_stl(X, resu_stl);
STL_interface<typename Interface::real_type>::ata_product(A_stl,X_stl,_size);
STL_interface<typename Interface::real_type>::ata_product(A_stl, X_stl, _size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
if (error>1.e-6){
if (error > 1.e-6) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
private :
private:
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix resu_stl;
@ -133,13 +112,7 @@ private :
typename Interface::gene_matrix A;
typename Interface::gene_matrix X;
int _size;
};
#endif

View File

@ -28,87 +28,79 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_atv_product {
public :
Action_atv_product( int size ) : _size(size)
{
public:
Action_atv_product(int size) : _size(size) {
MESSAGE("Action_atv_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
init_matrix<pseudo_random>(A_stl, _size);
init_vector<pseudo_random>(B_stl, _size);
init_vector<null_function>(X_stl, _size);
init_vector<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A_ref, A_stl);
Interface::vector_from_stl(B_ref, B_stl);
Interface::vector_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X,X_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::vector_from_stl(B, B_stl);
Interface::vector_from_stl(X, X_stl);
}
// invalidate copy ctor
Action_atv_product( const Action_atv_product & )
{
Action_atv_product(const Action_atv_product&) {
INFOS("illegal call to Action_atv_product Copy Ctor");
exit(1);
}
~Action_atv_product( void )
{
~Action_atv_product(void) {
MESSAGE("Action_atv_product Dtor");
Interface::free_matrix(A,_size);
Interface::free_matrix(A, _size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(A_ref, _size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
static inline std::string name() { return "atv_" + Interface::name(); }
double nb_op_base( void ) { return 2.0*_size*_size; }
double nb_op_base(void) { return 2.0 * _size * _size; }
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
inline void initialize(void) {
Interface::copy_matrix(A_ref, A, _size);
Interface::copy_vector(B_ref, B, _size);
Interface::copy_vector(X_ref, X, _size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_DONT_INLINE void calculate(void) {
BTL_ASM_COMMENT("begin atv");
Interface::atv_product(A,B,X,_size);
Interface::atv_product(A, B, X, _size);
BTL_ASM_COMMENT("end atv");
}
void check_result( void )
{
if (_size>128) return;
Interface::vector_to_stl(X,resu_stl);
void check_result(void) {
if (_size > 128) return;
Interface::vector_to_stl(X, resu_stl);
STL_interface<typename Interface::real_type>::atv_product(A_stl,B_stl,X_stl,_size);
STL_interface<typename Interface::real_type>::atv_product(A_stl, B_stl, X_stl, _size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
if (error>1.e-6){
if (error > 1.e-6) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
private :
private:
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
@ -122,13 +114,7 @@ private :
typename Interface::gene_vector B;
typename Interface::gene_vector X;
int _size;
};
#endif

View File

@ -27,38 +27,34 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_axpby {
public :
public:
// Ctor
Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size)
{
Action_axpby(int size) : _alpha(0.5), _beta(0.95), _size(size) {
MESSAGE("Action_axpby Ctor");
// STL vector initialization
init_vector<pseudo_random>(X_stl,_size);
init_vector<pseudo_random>(Y_stl,_size);
init_vector<null_function>(resu_stl,_size);
init_vector<pseudo_random>(X_stl, _size);
init_vector<pseudo_random>(Y_stl, _size);
init_vector<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(Y_ref,Y_stl);
Interface::vector_from_stl(X_ref, X_stl);
Interface::vector_from_stl(Y_ref, Y_stl);
Interface::vector_from_stl(X,X_stl);
Interface::vector_from_stl(Y,Y_stl);
Interface::vector_from_stl(X, X_stl);
Interface::vector_from_stl(Y, Y_stl);
}
// invalidate copy ctor
Action_axpby( const Action_axpby & )
{
Action_axpby(const Action_axpby&) {
INFOS("illegal call to Action_axpby Copy Ctor");
exit(1);
}
// Dtor
~Action_axpby( void ){
~Action_axpby(void) {
MESSAGE("Action_axpby Dtor");
// deallocation
@ -70,44 +66,37 @@ public :
}
// action name
static inline std::string name( void )
{
return "axpby_"+Interface::name();
static inline std::string name(void) { return "axpby_" + Interface::name(); }
double nb_op_base(void) { return 3.0 * _size; }
inline void initialize(void) {
Interface::copy_vector(X_ref, X, _size);
Interface::copy_vector(Y_ref, Y, _size);
}
double nb_op_base( void ){
return 3.0*_size;
}
inline void initialize( void ){
Interface::copy_vector(X_ref,X,_size);
Interface::copy_vector(Y_ref,Y,_size);
}
inline void calculate( void ) {
inline void calculate(void) {
BTL_ASM_COMMENT("mybegin axpby");
Interface::axpby(_alpha,X,_beta,Y,_size);
Interface::axpby(_alpha, X, _beta, Y, _size);
BTL_ASM_COMMENT("myend axpby");
}
void check_result( void ){
if (_size>128) return;
void check_result(void) {
if (_size > 128) return;
// calculation check
Interface::vector_to_stl(Y,resu_stl);
Interface::vector_to_stl(Y, resu_stl);
STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
STL_interface<typename Interface::real_type>::axpby(_alpha, X_stl, _beta, Y_stl, _size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
if (error>1.e-6){
if (error > 1.e-6) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(2);
}
}
private :
private:
typename Interface::stl_vector X_stl;
typename Interface::stl_vector Y_stl;
typename Interface::stl_vector resu_stl;

View File

@ -28,46 +28,39 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_axpy {
public :
public:
// Ctor
Action_axpy( int size ):_coef(1.0),_size(size)
{
Action_axpy(int size) : _coef(1.0), _size(size) {
MESSAGE("Action_axpy Ctor");
// STL vector initialization
init_vector<pseudo_random>(X_stl,_size);
init_vector<pseudo_random>(Y_stl,_size);
init_vector<null_function>(resu_stl,_size);
init_vector<pseudo_random>(X_stl, _size);
init_vector<pseudo_random>(Y_stl, _size);
init_vector<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(Y_ref,Y_stl);
Interface::vector_from_stl(X,X_stl);
Interface::vector_from_stl(Y,Y_stl);
Interface::vector_from_stl(X_ref, X_stl);
Interface::vector_from_stl(Y_ref, Y_stl);
Interface::vector_from_stl(X, X_stl);
Interface::vector_from_stl(Y, Y_stl);
}
// invalidate copy ctor
Action_axpy( const Action_axpy & )
{
Action_axpy(const Action_axpy&) {
INFOS("illegal call to Action_axpy Copy Ctor");
exit(1);
}
// Dtor
~Action_axpy( void ){
~Action_axpy(void) {
MESSAGE("Action_axpy Dtor");
// deallocation
@ -81,46 +74,38 @@ public :
// action name
static inline std::string name( void )
{
return "axpy_"+Interface::name();
static inline std::string name(void) { return "axpy_" + Interface::name(); }
double nb_op_base(void) { return 2.0 * _size; }
inline void initialize(void) {
Interface::copy_vector(X_ref, X, _size);
Interface::copy_vector(Y_ref, Y, _size);
}
double nb_op_base( void ){
return 2.0*_size;
}
inline void initialize( void ){
Interface::copy_vector(X_ref,X,_size);
Interface::copy_vector(Y_ref,Y,_size);
}
inline void calculate( void ) {
inline void calculate(void) {
BTL_ASM_COMMENT("mybegin axpy");
Interface::axpy(_coef,X,Y,_size);
Interface::axpy(_coef, X, Y, _size);
BTL_ASM_COMMENT("myend axpy");
}
void check_result( void ){
if (_size>128) return;
void check_result(void) {
if (_size > 128) return;
// calculation check
Interface::vector_to_stl(Y,resu_stl);
Interface::vector_to_stl(Y, resu_stl);
STL_interface<typename Interface::real_type>::axpy(_coef,X_stl,Y_stl,_size);
STL_interface<typename Interface::real_type>::axpy(_coef, X_stl, Y_stl, _size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Y_stl, resu_stl);
if (error>1.e-6){
if (error > 1.e-6) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(0);
}
}
private :
private:
typename Interface::stl_vector X_stl;
typename Interface::stl_vector Y_stl;
typename Interface::stl_vector resu_stl;

View File

@ -27,93 +27,75 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_cholesky {
public :
public:
// Ctor
Action_cholesky( int size ):_size(size)
{
Action_cholesky(int size) : _size(size) {
MESSAGE("Action_cholesky Ctor");
// STL mat/vec initialization
init_matrix_symm<pseudo_random>(X_stl,_size);
init_matrix<null_function>(C_stl,_size);
init_matrix_symm<pseudo_random>(X_stl, _size);
init_matrix<null_function>(C_stl, _size);
// make sure X is invertible
for (int i=0; i<_size; ++i)
X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
for (int i = 0; i < _size; ++i) X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(X, X_stl);
Interface::matrix_from_stl(C, C_stl);
_cost = 0;
for (int j=0; j<_size; ++j)
{
double r = std::max(_size - j -1,0);
_cost += 2*(r*j+r+j);
for (int j = 0; j < _size; ++j) {
double r = std::max(_size - j - 1, 0);
_cost += 2 * (r * j + r + j);
}
}
// invalidate copy ctor
Action_cholesky( const Action_cholesky & )
{
Action_cholesky(const Action_cholesky&) {
INFOS("illegal call to Action_cholesky Copy Ctor");
exit(1);
}
// Dtor
~Action_cholesky( void ){
~Action_cholesky(void) {
MESSAGE("Action_cholesky Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
Interface::free_matrix(X_ref, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(C, _size);
}
// action name
static inline std::string name( void )
{
return "cholesky_"+Interface::name();
}
static inline std::string name(void) { return "cholesky_" + Interface::name(); }
double nb_op_base( void ){
return _cost;
}
double nb_op_base(void) { return _cost; }
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
inline void calculate( void ) {
Interface::cholesky(X,C,_size);
}
inline void calculate(void) { Interface::cholesky(X, C, _size); }
void check_result( void ){
void check_result(void) {
// calculation check
// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
private:
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;

View File

@ -23,91 +23,78 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_ger {
public :
public:
// Ctor
BTL_DONT_INLINE Action_ger( int size ):_size(size)
{
BTL_DONT_INLINE Action_ger(int size) : _size(size) {
MESSAGE("Action_ger Ctor");
// STL matrix and vector initialization
typename Interface::stl_matrix tmp;
init_matrix<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<pseudo_random>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
init_matrix<pseudo_random>(A_stl, _size);
init_vector<pseudo_random>(B_stl, _size);
init_vector<pseudo_random>(X_stl, _size);
init_vector<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
Interface::matrix_from_stl(A_ref, A_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::vector_from_stl(B_ref, B_stl);
Interface::vector_from_stl(B, B_stl);
Interface::vector_from_stl(X_ref, X_stl);
Interface::vector_from_stl(X, X_stl);
}
// invalidate copy ctor
Action_ger( const Action_ger & )
{
Action_ger(const Action_ger&) {
INFOS("illegal call to Action_ger Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_ger( void ){
BTL_DONT_INLINE ~Action_ger(void) {
MESSAGE("Action_ger Dtor");
Interface::free_matrix(A,_size);
Interface::free_matrix(A, _size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(A_ref, _size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
// action name
static inline std::string name( void )
{
return "ger_" + Interface::name();
static inline std::string name(void) { return "ger_" + Interface::name(); }
double nb_op_base(void) { return 2.0 * _size * _size; }
BTL_DONT_INLINE void initialize(void) {
Interface::copy_matrix(A_ref, A, _size);
Interface::copy_vector(B_ref, B, _size);
Interface::copy_vector(X_ref, X, _size);
}
double nb_op_base( void ){
return 2.0*_size*_size;
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_DONT_INLINE void calculate(void) {
BTL_ASM_COMMENT("#begin ger");
Interface::ger(A,B,X,_size);
Interface::ger(A, B, X, _size);
BTL_ASM_COMMENT("end ger");
}
BTL_DONT_INLINE void check_result( void ){
BTL_DONT_INLINE void check_result(void) {
// calculation check
Interface::vector_to_stl(X,resu_stl);
Interface::vector_to_stl(X, resu_stl);
STL_interface<typename Interface::real_type>::ger(A_stl,B_stl,X_stl,_size);
STL_interface<typename Interface::real_type>::ger(A_stl, B_stl, X_stl, _size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
if (error>1.e-3){
if (error > 1.e-3) {
INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// exit(0);
}
}
private :
private:
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
@ -124,5 +111,4 @@ private :
int _size;
};
#endif

View File

@ -27,94 +27,77 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_hessenberg {
public :
public:
// Ctor
Action_hessenberg( int size ):_size(size)
{
Action_hessenberg(int size) : _size(size) {
MESSAGE("Action_hessenberg Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
init_matrix<pseudo_random>(X_stl, _size);
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
init_matrix<null_function>(C_stl, _size);
init_matrix<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(X, X_stl);
Interface::matrix_from_stl(C, C_stl);
_cost = 0;
for (int j=0; j<_size-2; ++j)
{
double r = std::max(0,_size-j-1);
double b = std::max(0,_size-j-2);
_cost += 6 + 3*b + r*r*4 + r*_size*4;
for (int j = 0; j < _size - 2; ++j) {
double r = std::max(0, _size - j - 1);
double b = std::max(0, _size - j - 2);
_cost += 6 + 3 * b + r * r * 4 + r * _size * 4;
}
}
// invalidate copy ctor
Action_hessenberg( const Action_hessenberg & )
{
Action_hessenberg(const Action_hessenberg&) {
INFOS("illegal call to Action_hessenberg Copy Ctor");
exit(1);
}
// Dtor
~Action_hessenberg( void ){
~Action_hessenberg(void) {
MESSAGE("Action_hessenberg Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
Interface::free_matrix(X_ref, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(C, _size);
}
// action name
static inline std::string name( void )
{
return "hessenberg_"+Interface::name();
}
static inline std::string name(void) { return "hessenberg_" + Interface::name(); }
double nb_op_base( void ){
return _cost;
}
double nb_op_base(void) { return _cost; }
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
inline void calculate( void ) {
Interface::hessenberg(X,C,_size);
}
inline void calculate(void) { Interface::hessenberg(X, C, _size); }
void check_result( void ){
void check_result(void) {
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
Interface::matrix_to_stl(C, resu_stl);
// STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
private:
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;
@ -127,97 +110,81 @@ private :
double _cost;
};
template<class Interface>
template <class Interface>
class Action_tridiagonalization {
public :
public:
// Ctor
Action_tridiagonalization( int size ):_size(size)
{
Action_tridiagonalization(int size) : _size(size) {
MESSAGE("Action_tridiagonalization Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
for(int i=0; i<_size; ++i)
{
for(int j=0; j<i; ++j)
X_stl[i][j] = X_stl[j][i];
init_matrix<pseudo_random>(X_stl, _size);
for (int i = 0; i < _size; ++i) {
for (int j = 0; j < i; ++j) X_stl[i][j] = X_stl[j][i];
}
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
init_matrix<null_function>(C_stl, _size);
init_matrix<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(X, X_stl);
Interface::matrix_from_stl(C, C_stl);
_cost = 0;
for (int j=0; j<_size-2; ++j)
{
double r = std::max(0,_size-j-1);
double b = std::max(0,_size-j-2);
_cost += 6. + 3.*b + r*r*8.;
for (int j = 0; j < _size - 2; ++j) {
double r = std::max(0, _size - j - 1);
double b = std::max(0, _size - j - 2);
_cost += 6. + 3. * b + r * r * 8.;
}
}
// invalidate copy ctor
Action_tridiagonalization( const Action_tridiagonalization & )
{
Action_tridiagonalization(const Action_tridiagonalization&) {
INFOS("illegal call to Action_tridiagonalization Copy Ctor");
exit(1);
}
// Dtor
~Action_tridiagonalization( void ){
~Action_tridiagonalization(void) {
MESSAGE("Action_tridiagonalization Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
Interface::free_matrix(X_ref, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(C, _size);
}
// action name
static inline std::string name( void ) { return "tridiagonalization_"+Interface::name(); }
static inline std::string name(void) { return "tridiagonalization_" + Interface::name(); }
double nb_op_base( void ){
return _cost;
}
double nb_op_base(void) { return _cost; }
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
inline void calculate( void ) {
Interface::tridiagonalization(X,C,_size);
}
inline void calculate(void) { Interface::tridiagonalization(X, C, _size); }
void check_result( void ){
void check_result(void) {
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
Interface::matrix_to_stl(C, resu_stl);
// STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
private:
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;

View File

@ -27,88 +27,72 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_lu_decomp {
public :
public:
// Ctor
Action_lu_decomp( int size ):_size(size)
{
Action_lu_decomp(int size) : _size(size) {
MESSAGE("Action_lu_decomp Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
init_matrix<pseudo_random>(X_stl, _size);
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
init_matrix<null_function>(C_stl, _size);
init_matrix<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(X, X_stl);
Interface::matrix_from_stl(C, C_stl);
_cost = 2.0*size*size*size/3.0 + size*size;
_cost = 2.0 * size * size * size / 3.0 + size * size;
}
// invalidate copy ctor
Action_lu_decomp( const Action_lu_decomp & )
{
Action_lu_decomp(const Action_lu_decomp&) {
INFOS("illegal call to Action_lu_decomp Copy Ctor");
exit(1);
}
// Dtor
~Action_lu_decomp( void ){
~Action_lu_decomp(void) {
MESSAGE("Action_lu_decomp Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
Interface::free_matrix(X_ref, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(C, _size);
}
// action name
static inline std::string name( void )
{
return "complete_lu_decomp_"+Interface::name();
}
static inline std::string name(void) { return "complete_lu_decomp_" + Interface::name(); }
double nb_op_base( void ){
return _cost;
}
double nb_op_base(void) { return _cost; }
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
inline void calculate( void ) {
Interface::lu_decomp(X,C,_size);
}
inline void calculate(void) { Interface::lu_decomp(X, C, _size); }
void check_result( void ){
void check_result(void) {
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
Interface::matrix_to_stl(C, resu_stl);
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
private:
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;

View File

@ -1,14 +1,14 @@
//=====================================================
// File : action_lu_solve.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ -16,7 +16,7 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
//
#ifndef ACTION_LU_SOLVE
#define ACTION_LU_SOLVE
#include "utilities.h"
@ -28,33 +28,25 @@
using namespace std;
template<class Interface>
class Action_lu_solve
{
template <class Interface>
class Action_lu_solve {
public:
static inline std::string name(void) { return "lu_solve_" + Interface::name(); }
public :
static inline std::string name( void )
{
return "lu_solve_"+Interface::name();
}
static double nb_op_base(int size){
return 2.0*size*size*size/3.0; // questionable but not really important
static double nb_op_base(int size) {
return 2.0 * size * size * size / 3.0; // questionable but not really important
}
static double calculate( int nb_calc, int size ) {
static double calculate(int nb_calc, int size) {
// STL matrix and vector initialization
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
init_matrix<pseudo_random>(A_stl,size);
init_vector<pseudo_random>(B_stl,size);
init_vector<null_function>(X_stl,size);
init_matrix<pseudo_random>(A_stl, size);
init_vector<pseudo_random>(B_stl, size);
init_vector<null_function>(X_stl, size);
// generic matrix and vector initialization
@ -62,18 +54,18 @@ public :
typename Interface::gene_vector B;
typename Interface::gene_vector X;
typename Interface::gene_matrix LU;
typename Interface::gene_matrix LU;
Interface::matrix_from_stl(A, A_stl);
Interface::vector_from_stl(B, B_stl);
Interface::vector_from_stl(X, X_stl);
Interface::matrix_from_stl(LU, A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X,X_stl);
Interface::matrix_from_stl(LU,A_stl);
// local variable :
typename Interface::Pivot_Vector pivot; // pivot vector
Interface::new_Pivot_Vector(pivot,size);
typename Interface::Pivot_Vector pivot; // pivot vector
Interface::new_Pivot_Vector(pivot, size);
// timer utilities
Portable_Timer chronos;
@ -81,56 +73,48 @@ public :
// time measurement
chronos.start();
for (int ii=0;ii<nb_calc;ii++){
for (int ii = 0; ii < nb_calc; ii++) {
// LU factorization
Interface::copy_matrix(A,LU,size);
Interface::LU_factor(LU,pivot,size);
Interface::copy_matrix(A, LU, size);
Interface::LU_factor(LU, pivot, size);
// LU solve
Interface::LU_solve(LU,pivot,B,X,size);
Interface::LU_solve(LU, pivot, B, X, size);
}
// Time stop
chronos.stop();
double time=chronos.user_time();
double time = chronos.user_time();
// check result :
typename Interface::stl_vector B_new_stl(size);
Interface::vector_to_stl(X,X_stl);
Interface::vector_to_stl(X, X_stl);
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,X_stl,B_new_stl,size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(B_stl,B_new_stl);
if (error>1.e-5){
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, X_stl, B_new_stl, size);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(B_stl, B_new_stl);
if (error > 1.e-5) {
INFOS("WRONG CALCULATION...residual=" << error);
STL_interface<typename Interface::real_type>::display_vector(B_stl);
STL_interface<typename Interface::real_type>::display_vector(B_new_stl);
exit(0);
}
// deallocation and return time
Interface::free_matrix(A,size);
Interface::free_matrix(A, size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_Pivot_Vector(pivot);
return time;
}
};
#endif

View File

@ -28,103 +28,83 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_matrix_matrix_product {
public :
public:
// Ctor
Action_matrix_matrix_product( int size ):_size(size)
{
Action_matrix_matrix_product(int size) : _size(size) {
MESSAGE("Action_matrix_matrix_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<pseudo_random>(B_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
init_matrix<pseudo_random>(A_stl, _size);
init_matrix<pseudo_random>(B_stl, _size);
init_matrix<null_function>(X_stl, _size);
init_matrix<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(B_ref,B_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(B,B_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(A_ref, A_stl);
Interface::matrix_from_stl(B_ref, B_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::matrix_from_stl(B, B_stl);
Interface::matrix_from_stl(X, X_stl);
}
// invalidate copy ctor
Action_matrix_matrix_product( const Action_matrix_matrix_product & )
{
Action_matrix_matrix_product(const Action_matrix_matrix_product&) {
INFOS("illegal call to Action_matrix_matrix_product Copy Ctor");
exit(0);
}
// Dtor
~Action_matrix_matrix_product( void ){
~Action_matrix_matrix_product(void) {
MESSAGE("Action_matrix_matrix_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(B,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(B_ref,_size);
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(A, _size);
Interface::free_matrix(B, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(A_ref, _size);
Interface::free_matrix(B_ref, _size);
Interface::free_matrix(X_ref, _size);
}
// action name
static inline std::string name( void )
{
return "matrix_matrix_"+Interface::name();
static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
double nb_op_base(void) { return 2.0 * _size * _size * _size; }
inline void initialize(void) {
Interface::copy_matrix(A_ref, A, _size);
Interface::copy_matrix(B_ref, B, _size);
Interface::copy_matrix(X_ref, X, _size);
}
double nb_op_base( void ){
return 2.0*_size*_size*_size;
}
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(B_ref,B,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::matrix_matrix_product(A,B,X,_size);
}
void check_result( void ){
inline void calculate(void) { Interface::matrix_matrix_product(A, B, X, _size); }
void check_result(void) {
// calculation check
if (_size<200)
{
Interface::matrix_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-6){
if (_size < 200) {
Interface::matrix_to_stl(X, resu_stl);
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, _size);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
if (error > 1.e-6) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
}
private :
private:
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix B_stl;
typename Interface::stl_matrix X_stl;
@ -138,13 +118,7 @@ private :
typename Interface::gene_matrix B;
typename Interface::gene_matrix X;
int _size;
};
#endif

View File

@ -29,31 +29,23 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_matrix_matrix_product_bis {
public:
static inline std::string name(void) { return "matrix_matrix_" + Interface::name(); }
public :
static inline std::string name( void )
{
return "matrix_matrix_"+Interface::name();
}
static double nb_op_base(int size){
return 2.0*size*size*size;
}
static double calculate( int nb_calc, int size ) {
static double nb_op_base(int size) { return 2.0 * size * size * size; }
static double calculate(int nb_calc, int size) {
// STL matrix and vector initialization
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix B_stl;
typename Interface::stl_matrix X_stl;
init_matrix<pseudo_random>(A_stl,size);
init_matrix<pseudo_random>(B_stl,size);
init_matrix<null_function>(X_stl,size);
init_matrix<pseudo_random>(A_stl, size);
init_matrix<pseudo_random>(B_stl, size);
init_matrix<null_function>(X_stl, size);
// generic matrix and vector initialization
@ -65,15 +57,13 @@ public :
typename Interface::gene_matrix B;
typename Interface::gene_matrix X;
Interface::matrix_from_stl(A_ref, A_stl);
Interface::matrix_from_stl(B_ref, B_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(B_ref,B_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(B,B_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::matrix_from_stl(B, B_stl);
Interface::matrix_from_stl(X, X_stl);
// STL_timer utilities
@ -84,15 +74,12 @@ public :
chronos.start_baseline(nb_calc);
do {
Interface::copy_matrix(A_ref,A,size);
Interface::copy_matrix(B_ref,B,size);
Interface::copy_matrix(X_ref,X,size);
Interface::copy_matrix(A_ref, A, size);
Interface::copy_matrix(B_ref, B, size);
Interface::copy_matrix(X_ref, X, size);
// Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!!
}
while(chronos.check());
} while (chronos.check());
chronos.report(true);
@ -101,52 +88,44 @@ public :
chronos.start(nb_calc);
do {
Interface::copy_matrix(A_ref, A, size);
Interface::copy_matrix(B_ref, B, size);
Interface::copy_matrix(X_ref, X, size);
Interface::copy_matrix(A_ref,A,size);
Interface::copy_matrix(B_ref,B,size);
Interface::copy_matrix(X_ref,X,size);
Interface::matrix_matrix_product(A,B,X,size); // here it is not commented !!!!
}
while(chronos.check());
Interface::matrix_matrix_product(A, B, X, size); // here it is not commented !!!!
} while (chronos.check());
chronos.report(true);
double time=chronos.calculated_time/2000.0;
double time = chronos.calculated_time / 2000.0;
// calculation check
typename Interface::stl_matrix resu_stl(size);
Interface::matrix_to_stl(X,resu_stl);
Interface::matrix_to_stl(X, resu_stl);
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,size);
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl, B_stl, X_stl, size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
if (error>1.e-6){
if (error > 1.e-6) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
// deallocation and return time
Interface::free_matrix(A,size);
Interface::free_matrix(B,size);
Interface::free_matrix(X,size);
Interface::free_matrix(A, size);
Interface::free_matrix(B, size);
Interface::free_matrix(X, size);
Interface::free_matrix(A_ref,size);
Interface::free_matrix(B_ref,size);
Interface::free_matrix(X_ref,size);
Interface::free_matrix(A_ref, size);
Interface::free_matrix(B_ref, size);
Interface::free_matrix(X_ref, size);
return time;
}
};
#endif

View File

@ -28,106 +28,88 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_matrix_vector_product {
public :
public:
// Ctor
BTL_DONT_INLINE Action_matrix_vector_product( int size ):_size(size)
{
BTL_DONT_INLINE Action_matrix_vector_product(int size) : _size(size) {
MESSAGE("Action_matrix_vector_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
init_matrix<pseudo_random>(A_stl, _size);
init_vector<pseudo_random>(B_stl, _size);
init_vector<null_function>(X_stl, _size);
init_vector<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
Interface::matrix_from_stl(A_ref, A_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::vector_from_stl(B_ref, B_stl);
Interface::vector_from_stl(B, B_stl);
Interface::vector_from_stl(X_ref, X_stl);
Interface::vector_from_stl(X, X_stl);
}
// invalidate copy ctor
Action_matrix_vector_product( const Action_matrix_vector_product & )
{
Action_matrix_vector_product(const Action_matrix_vector_product&) {
INFOS("illegal call to Action_matrix_vector_product Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_matrix_vector_product( void ){
BTL_DONT_INLINE ~Action_matrix_vector_product(void) {
MESSAGE("Action_matrix_vector_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(A, _size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(A_ref, _size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
// action name
static inline std::string name( void )
{
return "matrix_vector_" + Interface::name();
static inline std::string name(void) { return "matrix_vector_" + Interface::name(); }
double nb_op_base(void) { return 2.0 * _size * _size; }
BTL_DONT_INLINE void initialize(void) {
Interface::copy_matrix(A_ref, A, _size);
Interface::copy_vector(B_ref, B, _size);
Interface::copy_vector(X_ref, X, _size);
}
double nb_op_base( void ){
return 2.0*_size*_size;
BTL_DONT_INLINE void calculate(void) {
BTL_ASM_COMMENT("#begin matrix_vector_product");
Interface::matrix_vector_product(A, B, X, _size);
BTL_ASM_COMMENT("end matrix_vector_product");
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_ASM_COMMENT("#begin matrix_vector_product");
Interface::matrix_vector_product(A,B,X,_size);
BTL_ASM_COMMENT("end matrix_vector_product");
}
BTL_DONT_INLINE void check_result( void ){
BTL_DONT_INLINE void check_result(void) {
// calculation check
Interface::vector_to_stl(X,resu_stl);
Interface::vector_to_stl(X, resu_stl);
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,B_stl,X_stl,_size);
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl, B_stl, X_stl, _size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(X_stl, resu_stl);
if (error>1.e-5){
if (error > 1.e-5) {
INFOS("WRONG CALCULATION...residual=" << error);
exit(0);
}
}
private :
private:
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
@ -141,13 +123,7 @@ private :
typename Interface::gene_vector B;
typename Interface::gene_vector X;
int _size;
};
#endif

View File

@ -27,90 +27,73 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_partial_lu {
public :
public:
// Ctor
Action_partial_lu( int size ):_size(size)
{
Action_partial_lu(int size) : _size(size) {
MESSAGE("Action_partial_lu Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
init_matrix<null_function>(C_stl,_size);
init_matrix<pseudo_random>(X_stl, _size);
init_matrix<null_function>(C_stl, _size);
// make sure X is invertible
for (int i=0; i<_size; ++i)
X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
for (int i = 0; i < _size; ++i) X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
Interface::matrix_from_stl(X_ref, X_stl);
Interface::matrix_from_stl(X, X_stl);
Interface::matrix_from_stl(C, C_stl);
_cost = 2.0*size*size*size/3.0 + size*size;
_cost = 2.0 * size * size * size / 3.0 + size * size;
}
// invalidate copy ctor
Action_partial_lu( const Action_partial_lu & )
{
Action_partial_lu(const Action_partial_lu&) {
INFOS("illegal call to Action_partial_lu Copy Ctor");
exit(1);
}
// Dtor
~Action_partial_lu( void ){
~Action_partial_lu(void) {
MESSAGE("Action_partial_lu Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
Interface::free_matrix(X_ref, _size);
Interface::free_matrix(X, _size);
Interface::free_matrix(C, _size);
}
// action name
static inline std::string name( void )
{
return "partial_lu_decomp_"+Interface::name();
}
static inline std::string name(void) { return "partial_lu_decomp_" + Interface::name(); }
double nb_op_base( void ){
return _cost;
}
double nb_op_base(void) { return _cost; }
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void initialize(void) { Interface::copy_matrix(X_ref, X, _size); }
inline void calculate( void ) {
Interface::partial_lu_decomp(X,C,_size);
}
inline void calculate(void) { Interface::partial_lu_decomp(X, C, _size); }
void check_result( void ){
void check_result(void) {
// calculation check
// Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
// Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
private:
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;

View File

@ -23,37 +23,33 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_rot {
public :
public:
// Ctor
BTL_DONT_INLINE Action_rot( int size ):_size(size)
{
BTL_DONT_INLINE Action_rot(int size) : _size(size) {
MESSAGE("Action_rot Ctor");
// STL matrix and vector initialization
typename Interface::stl_matrix tmp;
init_vector<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<pseudo_random>(A_stl, _size);
init_vector<pseudo_random>(B_stl, _size);
// generic matrix and vector initialization
Interface::vector_from_stl(A_ref,A_stl);
Interface::vector_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(A_ref, A_stl);
Interface::vector_from_stl(A, A_stl);
Interface::vector_from_stl(B_ref, B_stl);
Interface::vector_from_stl(B, B_stl);
}
// invalidate copy ctor
Action_rot( const Action_rot & )
{
Action_rot(const Action_rot&) {
INFOS("illegal call to Action_rot Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_rot( void ){
BTL_DONT_INLINE ~Action_rot(void) {
MESSAGE("Action_rot Dtor");
Interface::free_vector(A);
Interface::free_vector(B);
@ -62,44 +58,37 @@ public :
}
// action name
static inline std::string name( void )
{
return "rot_" + Interface::name();
static inline std::string name(void) { return "rot_" + Interface::name(); }
double nb_op_base(void) { return 6.0 * _size; }
BTL_DONT_INLINE void initialize(void) {
Interface::copy_vector(A_ref, A, _size);
Interface::copy_vector(B_ref, B, _size);
}
double nb_op_base( void ){
return 6.0*_size;
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_vector(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_DONT_INLINE void calculate(void) {
BTL_ASM_COMMENT("#begin rot");
Interface::rot(A,B,0.5,0.6,_size);
Interface::rot(A, B, 0.5, 0.6, _size);
BTL_ASM_COMMENT("end rot");
}
BTL_DONT_INLINE void check_result( void ){
BTL_DONT_INLINE void check_result(void) {
// calculation check
// Interface::vector_to_stl(X,resu_stl);
// Interface::vector_to_stl(X,resu_stl);
// STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);
// STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
// if (error>1.e-3){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
// if (error>1.e-3){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
private:
typename Interface::stl_vector A_stl;
typename Interface::stl_vector B_stl;
@ -112,5 +101,4 @@ private :
int _size;
};
#endif

View File

@ -28,95 +28,80 @@
using namespace std;
template<class Interface>
template <class Interface>
class Action_symv {
public :
public:
// Ctor
BTL_DONT_INLINE Action_symv( int size ):_size(size)
{
BTL_DONT_INLINE Action_symv(int size) : _size(size) {
MESSAGE("Action_symv Ctor");
// STL matrix and vector initialization
init_matrix_symm<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
init_matrix_symm<pseudo_random>(A_stl, _size);
init_vector<pseudo_random>(B_stl, _size);
init_vector<null_function>(X_stl, _size);
init_vector<null_function>(resu_stl, _size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
Interface::matrix_from_stl(A_ref, A_stl);
Interface::matrix_from_stl(A, A_stl);
Interface::vector_from_stl(B_ref, B_stl);
Interface::vector_from_stl(B, B_stl);
Interface::vector_from_stl(X_ref, X_stl);
Interface::vector_from_stl(X, X_stl);
}
// invalidate copy ctor
Action_symv( const Action_symv & )
{
Action_symv(const Action_symv&) {
INFOS("illegal call to Action_symv Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_symv( void ){
Interface::free_matrix(A,_size);
BTL_DONT_INLINE ~Action_symv(void) {
Interface::free_matrix(A, _size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(A_ref, _size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
// action name
static inline std::string name( void )
{
return "symv_" + Interface::name();
static inline std::string name(void) { return "symv_" + Interface::name(); }
double nb_op_base(void) { return 2.0 * _size * _size; }
BTL_DONT_INLINE void initialize(void) {
Interface::copy_matrix(A_ref, A, _size);
Interface::copy_vector(B_ref, B, _size);
Interface::copy_vector(X_ref, X, _size);
}
double nb_op_base( void ){
return 2.0*_size*_size;
BTL_DONT_INLINE void calculate(void) {