mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 10:09:36 +08:00
* updated benchmark files according to recent renamings
* various improvements in BTL including trisolver and cholesky bench
This commit is contained in:
parent
e9e5261664
commit
93115619c2
@ -29,6 +29,7 @@
|
|||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
namespace Eigen
|
namespace Eigen
|
||||||
{
|
{
|
||||||
@ -39,11 +40,11 @@ class BenchTimer
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
BenchTimer() : m_best(1e12) {}
|
BenchTimer() { reset(); }
|
||||||
|
|
||||||
~BenchTimer() {}
|
~BenchTimer() {}
|
||||||
|
|
||||||
inline void reset(void) {m_best = 1e12;}
|
inline void reset(void) {m_best = 1e6;}
|
||||||
inline void start(void) {m_start = getTime();}
|
inline void start(void) {m_start = getTime();}
|
||||||
inline void stop(void)
|
inline void stop(void)
|
||||||
{
|
{
|
||||||
|
@ -99,9 +99,9 @@ int main(int argc, char *argv[])
|
|||||||
|
|
||||||
Scalar alpha, beta;
|
Scalar alpha, beta;
|
||||||
MyMatrix ma(M,K), mb(K,N), mc(M,N);
|
MyMatrix ma(M,K), mb(K,N), mc(M,N);
|
||||||
ma = MyMatrix::random(M,K);
|
ma = MyMatrix::Random(M,K);
|
||||||
mb = MyMatrix::random(K,N);
|
mb = MyMatrix::Random(K,N);
|
||||||
mc = MyMatrix::random(M,N);
|
mc = MyMatrix::Random(M,N);
|
||||||
|
|
||||||
Eigen::BenchTimer timer;
|
Eigen::BenchTimer timer;
|
||||||
|
|
||||||
@ -132,9 +132,9 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
// clear
|
// clear
|
||||||
ma = MyMatrix::random(M,K);
|
ma = MyMatrix::Random(M,K);
|
||||||
mb = MyMatrix::random(K,N);
|
mb = MyMatrix::Random(K,N);
|
||||||
mc = MyMatrix::random(M,N);
|
mc = MyMatrix::Random(M,N);
|
||||||
|
|
||||||
// eigen
|
// eigen
|
||||||
// if (!(std::string(argv[1])=="auto"))
|
// if (!(std::string(argv[1])=="auto"))
|
||||||
@ -153,9 +153,9 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
// clear
|
// clear
|
||||||
ma = MyMatrix::random(M,K);
|
ma = MyMatrix::Random(M,K);
|
||||||
mb = MyMatrix::random(K,N);
|
mb = MyMatrix::Random(K,N);
|
||||||
mc = MyMatrix::random(M,N);
|
mc = MyMatrix::Random(M,N);
|
||||||
|
|
||||||
// eigen normal
|
// eigen normal
|
||||||
if (!(std::string(argv[1])=="auto"))
|
if (!(std::string(argv[1])=="auto"))
|
||||||
@ -193,11 +193,11 @@ void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb
|
|||||||
void check_product(int M, int N, int K)
|
void check_product(int M, int N, int K)
|
||||||
{
|
{
|
||||||
MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
|
MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
|
||||||
ma = MyMatrix::random(M,K);
|
ma = MyMatrix::Random(M,K);
|
||||||
mb = MyMatrix::random(K,N);
|
mb = MyMatrix::Random(K,N);
|
||||||
maT = ma.transpose();
|
maT = ma.transpose();
|
||||||
mbT = mb.transpose();
|
mbT = mb.transpose();
|
||||||
mc = MyMatrix::random(M,N);
|
mc = MyMatrix::Random(M,N);
|
||||||
|
|
||||||
MyMatrix::Scalar eps = 1e-4;
|
MyMatrix::Scalar eps = 1e-4;
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
|
|||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
|
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
|
||||||
|
|
||||||
MatrixType a = MatrixType::random(rows,cols);
|
MatrixType a = MatrixType::Random(rows,cols);
|
||||||
SquareMatrixType covMat = a * a.adjoint();
|
SquareMatrixType covMat = a * a.adjoint();
|
||||||
|
|
||||||
BenchTimer timerSa, timerStd;
|
BenchTimer timerSa, timerStd;
|
||||||
|
@ -18,7 +18,7 @@ USING_PART_OF_NAMESPACE_EIGEN
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::ones();
|
Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones();
|
||||||
Matrix<SCALAR,MATSIZE,MATSIZE> m;
|
Matrix<SCALAR,MATSIZE,MATSIZE> m;
|
||||||
for(int i = 0; i < MATSIZE; i++)
|
for(int i = 0; i < MATSIZE; i++)
|
||||||
for(int j = 0; j < MATSIZE; j++)
|
for(int j = 0; j < MATSIZE; j++)
|
||||||
@ -28,7 +28,7 @@ int main(int argc, char *argv[])
|
|||||||
asm("#begin");
|
asm("#begin");
|
||||||
for(int a = 0; a < REPEAT; a++)
|
for(int a = 0; a < REPEAT; a++)
|
||||||
{
|
{
|
||||||
m = Matrix<SCALAR,MATSIZE,MATSIZE>::ones() + 0.00005 * (m + (m*m));
|
m = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones() + 0.00005 * (m + (m*m));
|
||||||
}
|
}
|
||||||
asm("#end");
|
asm("#end");
|
||||||
cout << m << endl;
|
cout << m << endl;
|
||||||
|
@ -26,7 +26,7 @@ int main(int argc, char *argv[])
|
|||||||
c = Eigen::ei_random<int>(0,10);
|
c = Eigen::ei_random<int>(0,10);
|
||||||
nr = Eigen::ei_random<int>(50,80);
|
nr = Eigen::ei_random<int>(50,80);
|
||||||
nc = Eigen::ei_random<int>(50,80);
|
nc = Eigen::ei_random<int>(50,80);
|
||||||
m.block(r,c,nr,nc) += Mat::ones(nr,nc);
|
m.block(r,c,nr,nc) += Mat::Ones(nr,nc);
|
||||||
m.block(r,c,nr,nc) *= SCALAR(10);
|
m.block(r,c,nr,nc) *= SCALAR(10);
|
||||||
m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
|
m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
|
||||||
m.block(r,c,nr,nc) /= SCALAR(10);
|
m.block(r,c,nr,nc) /= SCALAR(10);
|
||||||
|
@ -18,7 +18,7 @@ USING_PART_OF_NAMESPACE_EIGEN
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
MATTYPE I = MATTYPE::ones(MATSIZE,MATSIZE);
|
MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE);
|
||||||
MATTYPE m(MATSIZE,MATSIZE);
|
MATTYPE m(MATSIZE,MATSIZE);
|
||||||
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
|
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
|
||||||
{
|
{
|
||||||
|
@ -19,7 +19,7 @@ USING_PART_OF_NAMESPACE_EIGEN
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
VECTYPE I = VECTYPE::ones(VECSIZE);
|
VECTYPE I = VECTYPE::Ones(VECSIZE);
|
||||||
VECTYPE m(VECSIZE,1);
|
VECTYPE m(VECSIZE,1);
|
||||||
for(int i = 0; i < VECSIZE; i++)
|
for(int i = 0; i < VECSIZE; i++)
|
||||||
{
|
{
|
||||||
@ -27,7 +27,7 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
for(int a = 0; a < REPEAT; a++)
|
for(int a = 0; a < REPEAT; a++)
|
||||||
{
|
{
|
||||||
m = VECTYPE::ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
|
m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
|
||||||
}
|
}
|
||||||
cout << m[0] << endl;
|
cout << m[0] << endl;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -26,6 +26,11 @@ include_directories(
|
|||||||
${PROJECT_SOURCE_DIR}/generic_bench/utils
|
${PROJECT_SOURCE_DIR}/generic_bench/utils
|
||||||
${PROJECT_SOURCE_DIR}/libs/STL)
|
${PROJECT_SOURCE_DIR}/libs/STL)
|
||||||
|
|
||||||
|
# find_package(MKL)
|
||||||
|
# if (MKL_FOUND)
|
||||||
|
# add_definitions(-DHAVE_MKL)
|
||||||
|
# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
|
||||||
|
# endif (MKL_FOUND)
|
||||||
|
|
||||||
MACRO(BTL_ADD_BENCH targetname)
|
MACRO(BTL_ADD_BENCH targetname)
|
||||||
|
|
||||||
@ -49,6 +54,7 @@ MACRO(BTL_ADD_BENCH targetname)
|
|||||||
IF(BUILD_${targetname})
|
IF(BUILD_${targetname})
|
||||||
ADD_EXECUTABLE(${targetname} ${_sources})
|
ADD_EXECUTABLE(${targetname} ${_sources})
|
||||||
ADD_TEST(${targetname} "${targetname}")
|
ADD_TEST(${targetname} "${targetname}")
|
||||||
|
target_link_libraries(${targetname} ${DEFAULT_LIBRARIES})
|
||||||
ENDIF(BUILD_${targetname})
|
ENDIF(BUILD_${targetname})
|
||||||
|
|
||||||
ENDMACRO(BTL_ADD_BENCH)
|
ENDMACRO(BTL_ADD_BENCH)
|
||||||
|
@ -29,7 +29,18 @@ BTL uses cmake / ctest:
|
|||||||
|
|
||||||
$ ctest -V
|
$ ctest -V
|
||||||
|
|
||||||
You can also run a single bench, e.g.: ctest -V -R eigen
|
You can run the benchmarks only on libraries matching a given regular expression:
|
||||||
|
ctest -V -R <regexp>
|
||||||
|
For instance:
|
||||||
|
ctest -V -R eigen2
|
||||||
|
|
||||||
|
You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
|
||||||
|
BTL_CONFIG="-a action1{:action2}*" ctest -V
|
||||||
|
An exemple:
|
||||||
|
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
|
||||||
|
|
||||||
|
Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
|
||||||
|
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
|
||||||
|
|
||||||
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
|
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
|
||||||
If gnuplot is available, choose a directory name in the data directory to store the results and type
|
If gnuplot is available, choose a directory name in the data directory to store the results and type
|
||||||
|
124
bench/btl/actions/action_axpby.hh
Normal file
124
bench/btl/actions/action_axpby.hh
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
//=====================================================
|
||||||
|
// File : action_axpby.hh
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
//=====================================================
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
//
|
||||||
|
#ifndef ACTION_AXPBY
|
||||||
|
#define ACTION_AXPBY
|
||||||
|
#include "utilities.h"
|
||||||
|
#include "STL_interface.hh"
|
||||||
|
#include <string>
|
||||||
|
#include "init/init_function.hh"
|
||||||
|
#include "init/init_vector.hh"
|
||||||
|
#include "init/init_matrix.hh"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
template<class Interface>
|
||||||
|
class Action_axpby {
|
||||||
|
|
||||||
|
public :
|
||||||
|
|
||||||
|
// Ctor
|
||||||
|
Action_axpby( int size ):_size(size),_alpha(0.5),_beta(0.95)
|
||||||
|
{
|
||||||
|
MESSAGE("Action_axpby Ctor");
|
||||||
|
|
||||||
|
// STL vector initialization
|
||||||
|
init_vector<pseudo_random>(X_stl,_size);
|
||||||
|
init_vector<pseudo_random>(Y_stl,_size);
|
||||||
|
init_vector<null_function>(resu_stl,_size);
|
||||||
|
|
||||||
|
// generic matrix and vector initialization
|
||||||
|
Interface::vector_from_stl(X_ref,X_stl);
|
||||||
|
Interface::vector_from_stl(Y_ref,Y_stl);
|
||||||
|
|
||||||
|
Interface::vector_from_stl(X,X_stl);
|
||||||
|
Interface::vector_from_stl(Y,Y_stl);
|
||||||
|
}
|
||||||
|
|
||||||
|
// invalidate copy ctor
|
||||||
|
Action_axpby( const Action_axpby & )
|
||||||
|
{
|
||||||
|
INFOS("illegal call to Action_axpby Copy Ctor");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dtor
|
||||||
|
~Action_axpby( void ){
|
||||||
|
MESSAGE("Action_axpby Dtor");
|
||||||
|
|
||||||
|
// deallocation
|
||||||
|
Interface::free_vector(X_ref);
|
||||||
|
Interface::free_vector(Y_ref);
|
||||||
|
|
||||||
|
Interface::free_vector(X);
|
||||||
|
Interface::free_vector(Y);
|
||||||
|
}
|
||||||
|
|
||||||
|
// action name
|
||||||
|
static inline std::string name( void )
|
||||||
|
{
|
||||||
|
return "axpby_"+Interface::name();
|
||||||
|
}
|
||||||
|
|
||||||
|
double nb_op_base( void ){
|
||||||
|
return 3.0*_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void initialize( void ){
|
||||||
|
Interface::copy_vector(X_ref,X,_size);
|
||||||
|
Interface::copy_vector(Y_ref,Y,_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void calculate( void ) {
|
||||||
|
Interface::axpby(_alpha,X,_beta,Y,_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_result( void ){
|
||||||
|
// calculation check
|
||||||
|
Interface::vector_to_stl(Y,resu_stl);
|
||||||
|
|
||||||
|
STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
|
||||||
|
|
||||||
|
typename Interface::real_type error=
|
||||||
|
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
|
||||||
|
|
||||||
|
if (error>1.e-6){
|
||||||
|
INFOS("WRONG CALCULATION...residual=" << error);
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private :
|
||||||
|
|
||||||
|
typename Interface::stl_vector X_stl;
|
||||||
|
typename Interface::stl_vector Y_stl;
|
||||||
|
typename Interface::stl_vector resu_stl;
|
||||||
|
|
||||||
|
typename Interface::gene_vector X_ref;
|
||||||
|
typename Interface::gene_vector Y_ref;
|
||||||
|
|
||||||
|
typename Interface::gene_vector X;
|
||||||
|
typename Interface::gene_vector Y;
|
||||||
|
|
||||||
|
typename Interface::real_type _alpha;
|
||||||
|
typename Interface::real_type _beta;
|
||||||
|
|
||||||
|
int _size;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
132
bench/btl/actions/action_cholesky.hh
Normal file
132
bench/btl/actions/action_cholesky.hh
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
//=====================================================
|
||||||
|
// File : action_cholesky.hh
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
//=====================================================
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
//
|
||||||
|
#ifndef ACTION_CHOLESKY
|
||||||
|
#define ACTION_CHOLESKY
|
||||||
|
#include "utilities.h"
|
||||||
|
#include "STL_interface.hh"
|
||||||
|
#include <string>
|
||||||
|
#include "init/init_function.hh"
|
||||||
|
#include "init/init_vector.hh"
|
||||||
|
#include "init/init_matrix.hh"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
template<class Interface>
|
||||||
|
class Action_cholesky {
|
||||||
|
|
||||||
|
public :
|
||||||
|
|
||||||
|
// Ctor
|
||||||
|
|
||||||
|
Action_cholesky( int size ):_size(size)
|
||||||
|
{
|
||||||
|
MESSAGE("Action_cholesky Ctor");
|
||||||
|
|
||||||
|
// STL vector initialization
|
||||||
|
typename Interface::stl_matrix tmp;
|
||||||
|
init_matrix<pseudo_random>(tmp,_size);
|
||||||
|
init_matrix<null_function>(X_stl,_size);
|
||||||
|
STL_interface<typename Interface::real_type>::ata_product(tmp,X_stl,_size);
|
||||||
|
|
||||||
|
init_matrix<null_function>(C_stl,_size);
|
||||||
|
init_matrix<null_function>(resu_stl,_size);
|
||||||
|
|
||||||
|
// generic matrix and vector initialization
|
||||||
|
Interface::matrix_from_stl(X_ref,X_stl);
|
||||||
|
Interface::matrix_from_stl(X,X_stl);
|
||||||
|
Interface::matrix_from_stl(C,C_stl);
|
||||||
|
|
||||||
|
_cost = 0;
|
||||||
|
for (int j=0; j<_size; ++j)
|
||||||
|
{
|
||||||
|
int r = std::max(_size - j -1,0);
|
||||||
|
_cost += 2*(r*j+r+j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// invalidate copy ctor
|
||||||
|
|
||||||
|
Action_cholesky( const Action_cholesky & )
|
||||||
|
{
|
||||||
|
INFOS("illegal call to Action_cholesky Copy Ctor");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dtor
|
||||||
|
|
||||||
|
~Action_cholesky( void ){
|
||||||
|
|
||||||
|
MESSAGE("Action_cholesky Dtor");
|
||||||
|
|
||||||
|
// deallocation
|
||||||
|
Interface::free_matrix(X_ref,_size);
|
||||||
|
Interface::free_matrix(X,_size);
|
||||||
|
Interface::free_matrix(C,_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// action name
|
||||||
|
|
||||||
|
static inline std::string name( void )
|
||||||
|
{
|
||||||
|
return "cholesky_"+Interface::name();
|
||||||
|
}
|
||||||
|
|
||||||
|
double nb_op_base( void ){
|
||||||
|
return _cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void initialize( void ){
|
||||||
|
Interface::copy_matrix(X_ref,X,_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void calculate( void ) {
|
||||||
|
Interface::cholesky(X,C,_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_result( void ){
|
||||||
|
// calculation check
|
||||||
|
Interface::matrix_to_stl(C,resu_stl);
|
||||||
|
|
||||||
|
// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
|
||||||
|
//
|
||||||
|
// typename Interface::real_type error=
|
||||||
|
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
|
||||||
|
//
|
||||||
|
// if (error>1.e-6){
|
||||||
|
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||||
|
// exit(0);
|
||||||
|
// }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private :
|
||||||
|
|
||||||
|
typename Interface::stl_matrix X_stl;
|
||||||
|
typename Interface::stl_matrix C_stl;
|
||||||
|
typename Interface::stl_matrix resu_stl;
|
||||||
|
|
||||||
|
typename Interface::gene_matrix X_ref;
|
||||||
|
typename Interface::gene_matrix X;
|
||||||
|
typename Interface::gene_matrix C;
|
||||||
|
|
||||||
|
int _size;
|
||||||
|
int _cost;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -49,11 +49,10 @@ public :
|
|||||||
// generic matrix and vector initialization
|
// generic matrix and vector initialization
|
||||||
|
|
||||||
Interface::matrix_from_stl(A_ref,A_stl);
|
Interface::matrix_from_stl(A_ref,A_stl);
|
||||||
Interface::vector_from_stl(B_ref,B_stl);
|
|
||||||
Interface::vector_from_stl(X_ref,X_stl);
|
|
||||||
|
|
||||||
Interface::matrix_from_stl(A,A_stl);
|
Interface::matrix_from_stl(A,A_stl);
|
||||||
|
Interface::vector_from_stl(B_ref,B_stl);
|
||||||
Interface::vector_from_stl(B,B_stl);
|
Interface::vector_from_stl(B,B_stl);
|
||||||
|
Interface::vector_from_stl(X_ref,X_stl);
|
||||||
Interface::vector_from_stl(X,X_stl);
|
Interface::vector_from_stl(X,X_stl);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
136
bench/btl/actions/action_trisolve.hh
Normal file
136
bench/btl/actions/action_trisolve.hh
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
//=====================================================
|
||||||
|
// File : action_trisolve.hh
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
//=====================================================
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
//
|
||||||
|
#ifndef ACTION_TRISOLVE
|
||||||
|
#define ACTION_TRISOLVE
|
||||||
|
#include "utilities.h"
|
||||||
|
#include "STL_interface.hh"
|
||||||
|
#include <string>
|
||||||
|
#include "init/init_function.hh"
|
||||||
|
#include "init/init_vector.hh"
|
||||||
|
#include "init/init_matrix.hh"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
template<class Interface>
|
||||||
|
class Action_trisolve {
|
||||||
|
|
||||||
|
public :
|
||||||
|
|
||||||
|
// Ctor
|
||||||
|
|
||||||
|
Action_trisolve( int size ):_size(size)
|
||||||
|
{
|
||||||
|
MESSAGE("Action_trisolve Ctor");
|
||||||
|
|
||||||
|
// STL vector initialization
|
||||||
|
init_matrix<pseudo_random>(L_stl,_size);
|
||||||
|
init_vector<pseudo_random>(B_stl,_size);
|
||||||
|
init_vector<null_function>(X_stl,_size);
|
||||||
|
for (int j=0; j<_size; ++j)
|
||||||
|
{
|
||||||
|
for (int i=0; i<j; ++i)
|
||||||
|
L_stl[j][i] = 0;
|
||||||
|
L_stl[j][j] += 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
init_vector<null_function>(resu_stl,_size);
|
||||||
|
|
||||||
|
// generic matrix and vector initialization
|
||||||
|
Interface::matrix_from_stl(L,L_stl);
|
||||||
|
Interface::vector_from_stl(X,X_stl);
|
||||||
|
Interface::vector_from_stl(B,B_stl);
|
||||||
|
|
||||||
|
_cost = 0;
|
||||||
|
for (int j=0; j<_size; ++j)
|
||||||
|
{
|
||||||
|
_cost += 2*j + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// invalidate copy ctor
|
||||||
|
|
||||||
|
Action_trisolve( const Action_trisolve & )
|
||||||
|
{
|
||||||
|
INFOS("illegal call to Action_trisolve Copy Ctor");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dtor
|
||||||
|
|
||||||
|
~Action_trisolve( void ){
|
||||||
|
|
||||||
|
MESSAGE("Action_trisolve Dtor");
|
||||||
|
|
||||||
|
// deallocation
|
||||||
|
Interface::free_matrix(L,_size);
|
||||||
|
Interface::free_vector(B);
|
||||||
|
Interface::free_vector(X);
|
||||||
|
}
|
||||||
|
|
||||||
|
// action name
|
||||||
|
|
||||||
|
static inline std::string name( void )
|
||||||
|
{
|
||||||
|
return "trisolve_"+Interface::name();
|
||||||
|
}
|
||||||
|
|
||||||
|
double nb_op_base( void ){
|
||||||
|
return _cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void initialize( void ){
|
||||||
|
//Interface::copy_vector(X_ref,X,_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void calculate( void ) {
|
||||||
|
Interface::trisolve_lower(L,B,X,_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_result( void ){
|
||||||
|
// calculation check
|
||||||
|
Interface::vector_to_stl(X,resu_stl);
|
||||||
|
|
||||||
|
STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size);
|
||||||
|
|
||||||
|
typename Interface::real_type error=
|
||||||
|
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
|
||||||
|
|
||||||
|
if (error>1.e-4){
|
||||||
|
INFOS("WRONG CALCULATION...residual=" << error);
|
||||||
|
exit(2);
|
||||||
|
} //else INFOS("CALCULATION OK...residual=" << error);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private :
|
||||||
|
|
||||||
|
typename Interface::stl_matrix L_stl;
|
||||||
|
typename Interface::stl_vector X_stl;
|
||||||
|
typename Interface::stl_vector B_stl;
|
||||||
|
typename Interface::stl_vector resu_stl;
|
||||||
|
|
||||||
|
typename Interface::gene_matrix L;
|
||||||
|
typename Interface::gene_vector X;
|
||||||
|
typename Interface::gene_vector B;
|
||||||
|
|
||||||
|
int _size;
|
||||||
|
int _cost;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
15
bench/btl/actions/basic_actions.hh
Normal file
15
bench/btl/actions/basic_actions.hh
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
|
||||||
|
#include "action_axpy.hh"
|
||||||
|
#include "action_axpby.hh"
|
||||||
|
|
||||||
|
#include "action_matrix_vector_product.hh"
|
||||||
|
#include "action_atv_product.hh"
|
||||||
|
|
||||||
|
#include "action_matrix_matrix_product.hh"
|
||||||
|
#include "action_ata_product.hh"
|
||||||
|
#include "action_aat_product.hh"
|
||||||
|
|
||||||
|
#include "action_trisolve.hh"
|
||||||
|
|
||||||
|
// #include "action_lu_solve.hh"
|
||||||
|
|
35
bench/btl/cmake/FindATLAS.cmake
Normal file
35
bench/btl/cmake/FindATLAS.cmake
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# include(FindLibraryWithDebug)
|
||||||
|
|
||||||
|
if (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
|
||||||
|
set(ATLAS_FIND_QUIETLY TRUE)
|
||||||
|
endif (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
|
||||||
|
|
||||||
|
find_path(ATLAS_INCLUDES
|
||||||
|
NAMES
|
||||||
|
cblas.h
|
||||||
|
PATHS
|
||||||
|
$ENV{ATLASDIR}/include
|
||||||
|
${INCLUDE_INSTALL_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
|
find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
|
find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
|
find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
|
if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
|
||||||
|
set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS})
|
||||||
|
endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
find_package_handle_standard_args(ATLAS DEFAULT_MSG
|
||||||
|
ATLAS_INCLUDES ATLAS_LIBRARIES)
|
||||||
|
|
||||||
|
mark_as_advanced(ATLAS_INCLUDES ATLAS_LIBRARIES)
|
@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
ADD_CUSTOM_TARGET(copy_scripts)
|
ADD_CUSTOM_TARGET(copy_scripts)
|
||||||
|
|
||||||
SET(script_files go_mean aat.hh ata.hh axpy.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
|
SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
|
||||||
|
|
||||||
FOREACH(script_file ${script_files})
|
FOREACH(script_file ${script_files})
|
||||||
ADD_CUSTOM_COMMAND(
|
ADD_CUSTOM_COMMAND(
|
||||||
|
3
bench/btl/data/axpby.hh
Normal file
3
bench/btl/data/axpby.hh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000
|
||||||
|
set xlabel "vector size" 0.000000,0.000000
|
||||||
|
set xrange [1:1000000]
|
3
bench/btl/data/cholesky.hh
Normal file
3
bench/btl/data/cholesky.hh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000
|
||||||
|
set xlabel "matrix size" 0.000000,0.000000
|
||||||
|
set xrange [6:1000]
|
@ -17,11 +17,13 @@ echo '<ul>'\
|
|||||||
'</p>' >> $webpagefilename
|
'</p>' >> $webpagefilename
|
||||||
|
|
||||||
source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html
|
source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html
|
||||||
source mk_mean_script.sh matrix_vector $1 11 50 300 500 > $1/matrix_vector.html
|
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html
|
||||||
source mk_mean_script.sh atv $1 11 50 300 500 > $1/atv.html
|
source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html
|
||||||
# source mk_mean_script.sh matrix_matrix $1 11 100 300 500 > $1/matrix_matrix.html
|
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html
|
||||||
# source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
|
source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
|
||||||
# source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
|
source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
|
||||||
|
source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html
|
||||||
|
source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html
|
||||||
|
|
||||||
## compile the web page ##
|
## compile the web page ##
|
||||||
|
|
||||||
|
@ -23,11 +23,11 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include "bench_parameter.hh"
|
#include "bench_parameter.hh"
|
||||||
|
#include "utils/xy_file.hh"
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
|
|
||||||
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
|
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
|
||||||
|
|
||||||
class Lib_Mean{
|
class Lib_Mean{
|
||||||
@ -152,29 +152,6 @@ int main( int argc , char *argv[] )
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
|
|
||||||
|
|
||||||
ifstream input_file (filename.c_str(),ios::in) ;
|
|
||||||
|
|
||||||
if (!input_file){
|
|
||||||
INFOS("!!! Error opening "<<filename);
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
int nb_point=0;
|
|
||||||
int size=0;
|
|
||||||
double mflops=0;
|
|
||||||
|
|
||||||
while (input_file >> size >> mflops ){
|
|
||||||
nb_point++;
|
|
||||||
tab_sizes.push_back(size);
|
|
||||||
tab_mflops.push_back(mflops);
|
|
||||||
}
|
|
||||||
SCRUTE(nb_point);
|
|
||||||
|
|
||||||
input_file.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
|
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
|
||||||
|
|
||||||
int size=tab_sizes.size();
|
int size=tab_sizes.size();
|
||||||
|
3
bench/btl/data/trisolve.hh
Normal file
3
bench/btl/data/trisolve.hh
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000
|
||||||
|
set xlabel "matrix-vector size" 0.000000,0.000000
|
||||||
|
set xrange [6:1000]
|
@ -25,14 +25,16 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "size_lin_log.hh"
|
#include "size_lin_log.hh"
|
||||||
#include "dump_file_x_y.hh"
|
#include "xy_file.hh"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "timers/portable_perf_analyzer.hh"
|
#include "timers/portable_perf_analyzer.hh"
|
||||||
// #include "timers/mixed_perf_analyzer.hh"
|
// #include "timers/mixed_perf_analyzer.hh"
|
||||||
// #include "timers/x86_perf_analyzer.hh"
|
// #include "timers/x86_perf_analyzer.hh"
|
||||||
// #include "timers/STL_perf_analyzer.hh"
|
// #include "timers/STL_perf_analyzer.hh"
|
||||||
|
#ifdef HAVE_MKL
|
||||||
|
extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
|
||||||
|
#endif
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
template <template<class> class Perf_Analyzer, class Action>
|
template <template<class> class Perf_Analyzer, class Action>
|
||||||
@ -41,8 +43,6 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
|||||||
if (BtlConfig::skipAction(Action::name()))
|
if (BtlConfig::skipAction(Action::name()))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
BTL_DISABLE_SSE_EXCEPTIONS();
|
|
||||||
|
|
||||||
string filename="bench_"+Action::name()+".dat";
|
string filename="bench_"+Action::name()+".dat";
|
||||||
|
|
||||||
INFOS("starting " <<filename);
|
INFOS("starting " <<filename);
|
||||||
@ -64,14 +64,71 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
|||||||
{
|
{
|
||||||
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
|
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
|
||||||
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
|
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
|
||||||
|
|
||||||
|
BTL_DISABLE_SSE_EXCEPTIONS();
|
||||||
|
#ifdef HAVE_MKL
|
||||||
|
{
|
||||||
|
float dummy;
|
||||||
|
cblas_saxpy(1,0,&dummy,1,&dummy,1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
|
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
|
||||||
std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
|
std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
|
||||||
}
|
}
|
||||||
// std::cout << "\n";
|
|
||||||
|
if (!BtlConfig::Instance.overwriteResults)
|
||||||
|
{
|
||||||
|
std::vector<int> oldSizes;
|
||||||
|
std::vector<double> oldFlops;
|
||||||
|
if (read_xy_file(filename, oldSizes, oldFlops, true))
|
||||||
|
{
|
||||||
|
// merge the two data
|
||||||
|
std::vector<int> newSizes;
|
||||||
|
std::vector<double> newFlops;
|
||||||
|
int i=0;
|
||||||
|
int j=0;
|
||||||
|
while (i<tab_sizes.size() && j<oldSizes.size())
|
||||||
|
{
|
||||||
|
if (tab_sizes[i] == oldSizes[j])
|
||||||
|
{
|
||||||
|
newSizes.push_back(tab_sizes[i]);
|
||||||
|
newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
|
||||||
|
++i;
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
else if (tab_sizes[i] < oldSizes[j])
|
||||||
|
{
|
||||||
|
newSizes.push_back(tab_sizes[i]);
|
||||||
|
newFlops.push_back(tab_mflops[i]);
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
newSizes.push_back(oldSizes[j]);
|
||||||
|
newFlops.push_back(oldFlops[j]);
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (i<tab_sizes.size())
|
||||||
|
{
|
||||||
|
newSizes.push_back(tab_sizes[i]);
|
||||||
|
newFlops.push_back(tab_mflops[i]);
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
while (j<oldSizes.size())
|
||||||
|
{
|
||||||
|
newSizes.push_back(oldSizes[j]);
|
||||||
|
newFlops.push_back(oldFlops[j]);
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
tab_mflops = newFlops;
|
||||||
|
tab_sizes = newSizes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// dump the result in a file :
|
// dump the result in a file :
|
||||||
|
dump_xy_file(tab_sizes,tab_mflops,filename);
|
||||||
dump_file_x_y(tab_sizes,tab_mflops,filename);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
// minimal time for each measurement
|
// minimal time for each measurement
|
||||||
#define REAL_TYPE float
|
#define REAL_TYPE float
|
||||||
// minimal time for each measurement
|
// minimal time for each measurement
|
||||||
#define MIN_TIME 0.5
|
#define MIN_TIME 1.0
|
||||||
// nb of point on bench curves
|
// nb of point on bench curves
|
||||||
#define NB_POINT 100
|
#define NB_POINT 100
|
||||||
// min vector size for axpy bench
|
// min vector size for axpy bench
|
||||||
|
@ -48,7 +48,7 @@
|
|||||||
: : [aux] "m" (aux)); \
|
: : [aux] "m" (aux)); \
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define DISABLE_SSE_EXCEPTIONS()
|
#define BTL_DISABLE_SSE_EXCEPTIONS()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** Enhanced std::string
|
/** Enhanced std::string
|
||||||
@ -163,7 +163,7 @@ class BtlConfig
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
BtlConfig()
|
BtlConfig()
|
||||||
: m_runSingleAction(false)
|
: overwriteResults(false)
|
||||||
{
|
{
|
||||||
char * _config;
|
char * _config;
|
||||||
_config = getenv ("BTL_CONFIG");
|
_config = getenv ("BTL_CONFIG");
|
||||||
@ -179,33 +179,38 @@ public:
|
|||||||
std::cerr << "error processing option: " << config[i] << "\n";
|
std::cerr << "error processing option: " << config[i] << "\n";
|
||||||
exit(2);
|
exit(2);
|
||||||
}
|
}
|
||||||
Instance.m_runSingleAction = true;
|
Instance.m_selectedActionNames = config[i+1].split(":");
|
||||||
Instance.m_singleActionName = config[i+1];
|
|
||||||
|
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
else if (config[i].beginsWith("--overwrite"))
|
||||||
|
{
|
||||||
|
Instance.overwriteResults = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BTL_DISABLE_SSE_EXCEPTIONS();
|
BTL_DISABLE_SSE_EXCEPTIONS();
|
||||||
}
|
}
|
||||||
|
|
||||||
BTL_DONT_INLINE static bool skipAction(const std::string& name)
|
BTL_DONT_INLINE static bool skipAction(const std::string& _name)
|
||||||
{
|
{
|
||||||
if (Instance.m_runSingleAction)
|
if (Instance.m_selectedActionNames.empty())
|
||||||
{
|
|
||||||
return !BtlString(name).contains(Instance.m_singleActionName);
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
BtlString name(_name);
|
||||||
|
for (int i=0; i<Instance.m_selectedActionNames.size(); ++i)
|
||||||
|
if (name.contains(Instance.m_selectedActionNames[i]))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected:
|
|
||||||
bool m_runSingleAction;
|
|
||||||
BtlString m_singleActionName;
|
|
||||||
|
|
||||||
static BtlConfig Instance;
|
static BtlConfig Instance;
|
||||||
|
bool overwriteResults;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::vector<BtlString> m_selectedActionNames;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BTL_MAIN \
|
#define BTL_MAIN \
|
||||||
|
@ -24,7 +24,7 @@
|
|||||||
#include "bench_parameter.hh"
|
#include "bench_parameter.hh"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "dump_file_x_y.hh"
|
#include "xy_file.hh"
|
||||||
#include "static/static_size_generator.hh"
|
#include "static/static_size_generator.hh"
|
||||||
#include "timers/portable_perf_analyzer.hh"
|
#include "timers/portable_perf_analyzer.hh"
|
||||||
#include "timers/mixed_perf_analyzer.hh"
|
#include "timers/mixed_perf_analyzer.hh"
|
||||||
|
@ -38,16 +38,16 @@ public:
|
|||||||
MESSAGE("Portable_Perf_Analyzer Dtor");
|
MESSAGE("Portable_Perf_Analyzer Dtor");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BTL_DONT_INLINE double eval_mflops(int size)
|
BTL_DONT_INLINE double eval_mflops(int size)
|
||||||
{
|
{
|
||||||
Action action(size);
|
Action action(size);
|
||||||
|
|
||||||
|
double time_action = 0;
|
||||||
double time_action = time_calculate(action);
|
action.initialize();
|
||||||
|
time_action = time_calculate(action);
|
||||||
while (time_action < MIN_TIME)
|
while (time_action < MIN_TIME)
|
||||||
{
|
{
|
||||||
|
//Action action(size);
|
||||||
_nb_calc *= 2;
|
_nb_calc *= 2;
|
||||||
action.initialize();
|
action.initialize();
|
||||||
time_action = time_calculate(action);
|
time_action = time_calculate(action);
|
||||||
@ -56,8 +56,10 @@ public:
|
|||||||
// optimize
|
// optimize
|
||||||
for (int i=1; i<NB_TRIES; ++i)
|
for (int i=1; i<NB_TRIES; ++i)
|
||||||
{
|
{
|
||||||
action.initialize();
|
Action _action(size);
|
||||||
time_action = std::min(time_action, time_calculate(action));
|
std::cout << " " << _action.nb_op_base()*_nb_calc/(time_action*1e6) << " ";
|
||||||
|
_action.initialize();
|
||||||
|
time_action = std::min(time_action, time_calculate(_action));
|
||||||
}
|
}
|
||||||
|
|
||||||
time_action = time_action / (double(_nb_calc));
|
time_action = time_action / (double(_nb_calc));
|
||||||
@ -66,13 +68,13 @@ public:
|
|||||||
action.initialize();
|
action.initialize();
|
||||||
action.calculate();
|
action.calculate();
|
||||||
action.check_result();
|
action.check_result();
|
||||||
|
|
||||||
return action.nb_op_base()/(time_action*1000000.0);
|
return action.nb_op_base()/(time_action*1000000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
BTL_DONT_INLINE double time_calculate(Action & action)
|
BTL_DONT_INLINE double time_calculate(Action & action)
|
||||||
{
|
{
|
||||||
// time measurement
|
// time measurement
|
||||||
|
action.calculate();
|
||||||
_chronos.start();
|
_chronos.start();
|
||||||
for (int ii=0;ii<_nb_calc;ii++)
|
for (int ii=0;ii<_nb_calc;ii++)
|
||||||
{
|
{
|
||||||
|
@ -39,6 +39,38 @@
|
|||||||
|
|
||||||
// A timer object measures CPU time.
|
// A timer object measures CPU time.
|
||||||
|
|
||||||
|
// class Portable_Timer
|
||||||
|
// {
|
||||||
|
// public:
|
||||||
|
//
|
||||||
|
// Portable_Timer( void )
|
||||||
|
// {
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// void start() { m_val = getTime(); }
|
||||||
|
//
|
||||||
|
// void stop() { m_val = getTime() - m_val; }
|
||||||
|
//
|
||||||
|
// double elapsed() { return m_val; }
|
||||||
|
//
|
||||||
|
// double user_time() { return elapsed(); }
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// private:
|
||||||
|
//
|
||||||
|
// static inline double getTime(void)
|
||||||
|
// {
|
||||||
|
// struct timeval tv;
|
||||||
|
// struct timezone tz;
|
||||||
|
// gettimeofday(&tv, &tz);
|
||||||
|
// return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// double m_val;
|
||||||
|
//
|
||||||
|
// }; // Portable_Timer
|
||||||
|
|
||||||
class Portable_Timer
|
class Portable_Timer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -46,42 +78,42 @@ class Portable_Timer
|
|||||||
Portable_Timer( void ):_utime_sec_start(-1),
|
Portable_Timer( void ):_utime_sec_start(-1),
|
||||||
_utime_usec_start(-1),
|
_utime_usec_start(-1),
|
||||||
_utime_sec_stop(-1),
|
_utime_sec_stop(-1),
|
||||||
_utime_usec_stop(-1)
|
_utime_usec_stop(-1)/*,
|
||||||
|
m_prev_cs(-1)*/
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void start()
|
void start()
|
||||||
{
|
{
|
||||||
|
|
||||||
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
|
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
|
||||||
|
// _start_time = std::clock();
|
||||||
_start_time = std::clock();
|
|
||||||
|
|
||||||
_utime_sec_start = resourcesUsage.ru_utime.tv_sec ;
|
_utime_sec_start = resourcesUsage.ru_utime.tv_sec ;
|
||||||
_utime_usec_start = resourcesUsage.ru_utime.tv_usec ;
|
_utime_usec_start = resourcesUsage.ru_utime.tv_usec ;
|
||||||
|
// m_prev_cs = resourcesUsage.ru_nivcsw;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void stop()
|
void stop()
|
||||||
{
|
{
|
||||||
|
|
||||||
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
|
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
|
||||||
|
// _stop_time = std::clock();
|
||||||
_stop_time = std::clock();
|
|
||||||
|
|
||||||
_utime_sec_stop = resourcesUsage.ru_utime.tv_sec ;
|
_utime_sec_stop = resourcesUsage.ru_utime.tv_sec ;
|
||||||
_utime_usec_stop = resourcesUsage.ru_utime.tv_usec ;
|
_utime_usec_stop = resourcesUsage.ru_utime.tv_usec ;
|
||||||
|
|
||||||
|
// m_prev_cs = resourcesUsage.ru_nivcsw - m_prev_cs;
|
||||||
|
// std::cerr << resourcesUsage.ru_nvcsw << " + " << resourcesUsage.ru_nivcsw << "\n";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double elapsed()
|
double elapsed()
|
||||||
{
|
{
|
||||||
return double(_stop_time - _start_time) / CLOCKS_PER_SEC;
|
return user_time();//double(_stop_time - _start_time) / CLOCKS_PER_SEC;
|
||||||
}
|
}
|
||||||
|
|
||||||
double user_time()
|
double user_time()
|
||||||
{
|
{
|
||||||
|
// std::cout << m_prev_cs << "\n";
|
||||||
long tot_utime_sec=_utime_sec_stop-_utime_sec_start;
|
long tot_utime_sec=_utime_sec_stop-_utime_sec_start;
|
||||||
long tot_utime_usec=_utime_usec_stop-_utime_usec_start;
|
long tot_utime_usec=_utime_usec_stop-_utime_usec_start;
|
||||||
return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ;
|
return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ;
|
||||||
@ -98,6 +130,8 @@ private:
|
|||||||
long _utime_sec_stop ;
|
long _utime_sec_stop ;
|
||||||
long _utime_usec_stop ;
|
long _utime_usec_stop ;
|
||||||
|
|
||||||
|
// long m_prev_cs;
|
||||||
|
|
||||||
std::clock_t _start_time;
|
std::clock_t _start_time;
|
||||||
std::clock_t _stop_time;
|
std::clock_t _stop_time;
|
||||||
|
|
||||||
|
@ -17,10 +17,41 @@
|
|||||||
// along with this program; if not, write to the Free Software
|
// along with this program; if not, write to the Free Software
|
||||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
//
|
//
|
||||||
#ifndef DUMP_FILE_X_Y_HH
|
#ifndef XY_FILE_HH
|
||||||
#define DUMP_FILE_X_Y_HH
|
#define XY_FILE_HH
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
|
||||||
|
std::vector<double> & tab_mflops, bool quiet = false)
|
||||||
|
{
|
||||||
|
|
||||||
|
std::ifstream input_file (filename.c_str(),std::ios::in);
|
||||||
|
|
||||||
|
if (!input_file){
|
||||||
|
if (!quiet) {
|
||||||
|
INFOS("!!! Error opening "<<filename);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nb_point=0;
|
||||||
|
int size=0;
|
||||||
|
double mflops=0;
|
||||||
|
|
||||||
|
while (input_file >> size >> mflops ){
|
||||||
|
nb_point++;
|
||||||
|
tab_sizes.push_back(size);
|
||||||
|
tab_mflops.push_back(mflops);
|
||||||
|
}
|
||||||
|
SCRUTE(nb_point);
|
||||||
|
|
||||||
|
input_file.close();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// The Vector class must satisfy the following part of STL vector concept :
|
// The Vector class must satisfy the following part of STL vector concept :
|
||||||
// resize() method
|
// resize() method
|
||||||
@ -30,17 +61,14 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
template<class Vector_A, class Vector_B>
|
template<class Vector_A, class Vector_B>
|
||||||
void dump_file_x_y(const Vector_A & X, const Vector_B & Y, const std::string & filename){
|
void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){
|
||||||
|
|
||||||
ofstream outfile (filename.c_str(),ios::out) ;
|
ofstream outfile (filename.c_str(),ios::out) ;
|
||||||
int size=X.size();
|
int size=X.size();
|
||||||
|
|
||||||
for (int i=0;i<size;i++){
|
for (int i=0;i<size;i++)
|
||||||
|
|
||||||
outfile << X[i] << " " << Y[i] << endl;
|
outfile << X[i] << " " << Y[i] << endl;
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
outfile.close();
|
outfile.close();
|
||||||
}
|
}
|
||||||
|
|
@ -1,13 +1,13 @@
|
|||||||
|
|
||||||
find_package(CBLAS)
|
find_package(ATLAS)
|
||||||
if (CBLAS_FOUND)
|
if (ATLAS_FOUND)
|
||||||
include_directories(${CBLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
|
include_directories(${ATLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
|
||||||
btl_add_bench(btl_cblas main.cpp)
|
btl_add_bench(btl_atlas main.cpp)
|
||||||
if(BUILD_btl_cblas)
|
if(BUILD_btl_atlas)
|
||||||
target_link_libraries(btl_cblas ${CBLAS_LIBRARIES})
|
target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
|
||||||
set_target_properties(btl_cblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS")
|
set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
|
||||||
endif(BUILD_btl_cblas)
|
endif(BUILD_btl_atlas)
|
||||||
endif (CBLAS_FOUND)
|
endif (ATLAS_FOUND)
|
||||||
|
|
||||||
find_package(MKL)
|
find_package(MKL)
|
||||||
if (MKL_FOUND)
|
if (MKL_FOUND)
|
||||||
@ -15,6 +15,6 @@ if (MKL_FOUND)
|
|||||||
btl_add_bench(btl_mkl main.cpp)
|
btl_add_bench(btl_mkl main.cpp)
|
||||||
if(BUILD_btl_mkl)
|
if(BUILD_btl_mkl)
|
||||||
target_link_libraries(btl_mkl ${MKL_LIBRARIES})
|
target_link_libraries(btl_mkl ${MKL_LIBRARIES})
|
||||||
set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL")
|
set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
|
||||||
endif(BUILD_btl_mkl)
|
endif(BUILD_btl_mkl)
|
||||||
endif (MKL_FOUND)
|
endif (MKL_FOUND)
|
||||||
|
@ -25,6 +25,11 @@
|
|||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
#include "cblas.h"
|
#include "cblas.h"
|
||||||
|
#ifdef HAS_LAPACK
|
||||||
|
// Cholesky Factorization
|
||||||
|
void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
|
||||||
|
void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAKE_STRING2(S) #S
|
#define MAKE_STRING2(S) #S
|
||||||
@ -53,31 +58,31 @@ public :
|
|||||||
cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
|
cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
|
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
|
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
|
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
|
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
|
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||||
{
|
|
||||||
cblas_daxpy(N,coef,X,1,Y,1);
|
cblas_daxpy(N,coef,X,1,Y,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
|
cblas_dscal(N,b,Y,1);
|
||||||
|
cblas_daxpy(N,a,X,1,Y,1);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
@ -90,41 +95,62 @@ public :
|
|||||||
return MAKE_STRING(CBLASNAME);
|
return MAKE_STRING(CBLASNAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
{
|
|
||||||
cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
|
cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
{
|
|
||||||
cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
|
cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
|
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
|
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
|
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
|
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
|
||||||
{
|
|
||||||
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N)
|
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N){
|
||||||
{
|
|
||||||
cblas_saxpy(N,coef,X,1,Y,1);
|
cblas_saxpy(N,coef,X,1,Y,1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void axpby(float a, const gene_vector & X, float b, gene_vector & Y, int N){
|
||||||
|
cblas_sscal(N,b,Y,1);
|
||||||
|
cblas_saxpy(N,a,X,1,Y,1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAS_LAPACK
|
||||||
|
static inline void cholesky(const gene_vector & X, gene_vector & C, int N){
|
||||||
|
cblas_scopy(N*N, X, 1, C, 1);
|
||||||
|
char uplo = 'L';
|
||||||
|
int info = 0;
|
||||||
|
spotrf_(&uplo, &N, C, &N, &info);
|
||||||
|
// float tmp[N];
|
||||||
|
// for (int j = 1; j < N; ++j)
|
||||||
|
// {
|
||||||
|
// int endSize = N-j-1;
|
||||||
|
// if (endSize>0) {
|
||||||
|
//cblas_sgemv(CblasColMajor, CblasNoTrans, N-j-1, j, ATL_rnone, A+j+1, lda, A+j, lda, ATL_rone, Ac+j+1, 1);
|
||||||
|
// cblas_sgemv(CblasColMajor, CblasNoTrans,endSize,j, 1.0, &(C[j+1]),N, &(C[j]),N, 0.0, &(C[j+1+N*j]),1);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
|
||||||
|
cblas_scopy(N, B, 1, X, 1);
|
||||||
|
cblas_strsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, 1);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,13 +20,11 @@
|
|||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "C_BLAS_interface.hh"
|
#include "C_BLAS_interface.hh"
|
||||||
#include "bench.hh"
|
#include "bench.hh"
|
||||||
#include "action_matrix_vector_product.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_matrix_matrix_product.hh"
|
|
||||||
#include "action_atv_product.hh"
|
#ifdef HAS_LAPACK
|
||||||
#include "action_axpy.hh"
|
#include "action_cholesky.hh"
|
||||||
#include "action_lu_solve.hh"
|
#endif
|
||||||
#include "action_ata_product.hh"
|
|
||||||
#include "action_aat_product.hh"
|
|
||||||
|
|
||||||
BTL_MAIN;
|
BTL_MAIN;
|
||||||
|
|
||||||
@ -34,17 +32,21 @@ int main()
|
|||||||
{
|
{
|
||||||
|
|
||||||
bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
bench<Action_axpby<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
|
||||||
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
#ifdef HAS_LAPACK
|
||||||
|
bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
#endif
|
||||||
|
|
||||||
//bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
|
//bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -146,6 +146,22 @@ public :
|
|||||||
Y[i]+=coef*X[i];
|
Y[i]+=coef*X[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
|
for (int i=0;i<N;i++)
|
||||||
|
Y[i] = a*X[i] + b*Y[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
|
||||||
|
copy_vector(B,X,N);
|
||||||
|
for(int i=0; i<N; ++i)
|
||||||
|
{
|
||||||
|
X[i] /= L[i][i];
|
||||||
|
real tmp = X[i];
|
||||||
|
for (int j=i+1; j<N; ++j)
|
||||||
|
X[j] -= tmp * L[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline real norm_diff(const stl_vector & A, const stl_vector & B)
|
static inline real norm_diff(const stl_vector & A, const stl_vector & B)
|
||||||
{
|
{
|
||||||
int N=A.size();
|
int N=A.size();
|
||||||
|
@ -20,19 +20,14 @@
|
|||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "STL_interface.hh"
|
#include "STL_interface.hh"
|
||||||
#include "bench.hh"
|
#include "bench.hh"
|
||||||
#include "action_matrix_vector_product.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_matrix_matrix_product.hh"
|
|
||||||
#include "action_axpy.hh"
|
|
||||||
#include "action_lu_solve.hh"
|
|
||||||
#include "action_ata_product.hh"
|
|
||||||
#include "action_aat_product.hh"
|
|
||||||
#include "action_atv_product.hh"
|
|
||||||
|
|
||||||
BTL_MAIN;
|
BTL_MAIN;
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
@ -3,13 +3,28 @@ find_package(Eigen2)
|
|||||||
if (EIGEN2_FOUND)
|
if (EIGEN2_FOUND)
|
||||||
|
|
||||||
include_directories(${EIGEN2_INCLUDE_DIR})
|
include_directories(${EIGEN2_INCLUDE_DIR})
|
||||||
btl_add_bench(btl_eigen2 main.cpp)
|
btl_add_bench(btl_eigen2_linear main_linear.cpp)
|
||||||
|
btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
|
||||||
|
btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
|
||||||
|
btl_add_bench(btl_eigen2_adv main_adv.cpp)
|
||||||
|
|
||||||
IF(NOT BTL_NOVEC)
|
IF(NOT BTL_NOVEC)
|
||||||
btl_add_bench(btl_eigen2_novec main.cpp)
|
btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
|
||||||
if(BUILD_btl_eigen2_novec)
|
btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)
|
||||||
set_target_properties(btl_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
|
btl_add_bench(btl_eigen2_novec_matmat main_matmat.cpp)
|
||||||
endif(BUILD_btl_eigen2_novec)
|
btl_add_bench(btl_eigen2_novec_adv main_adv.cpp)
|
||||||
|
if(BUILD_btl_eigen2_novec_linear)
|
||||||
|
set_target_properties(btl_eigen2_novec_linear PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
|
||||||
|
endif(BUILD_btl_eigen2_novec_linear)
|
||||||
|
if(BUILD_btl_eigen2_novec_vecmat)
|
||||||
|
set_target_properties(btl_eigen2_novec_vecmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
|
||||||
|
endif(BUILD_btl_eigen2_novec_vecmat)
|
||||||
|
if(BUILD_btl_eigen2_novec_matmat)
|
||||||
|
set_target_properties(btl_eigen2_novec_matmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
|
||||||
|
endif(BUILD_btl_eigen2_novec_matmat)
|
||||||
|
if(BUILD_btl_eigen2_novec_adv)
|
||||||
|
set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
|
||||||
|
endif(BUILD_btl_eigen2_novec_adv)
|
||||||
ENDIF(NOT BTL_NOVEC)
|
ENDIF(NOT BTL_NOVEC)
|
||||||
|
|
||||||
btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)
|
btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#define EIGEN2_INTERFACE_HH
|
#define EIGEN2_INTERFACE_HH
|
||||||
|
|
||||||
#include <Eigen/Core>
|
#include <Eigen/Core>
|
||||||
|
#include <Eigen/Cholesky>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "btl.hh"
|
#include "btl.hh"
|
||||||
|
|
||||||
@ -107,17 +108,21 @@ public :
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_vector_product(const gene_matrix & __restrict__ A, const gene_vector & __restrict__ B, gene_vector & __restrict__ X, int N){
|
static inline void matrix_vector_product(const gene_matrix & __restrict__ A, const gene_vector & __restrict__ B, gene_vector & __restrict__ X, int N){
|
||||||
X = (A*B).lazy();
|
X = (A*B)/*.lazy()*/;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
X = (A.transpose()*B).lazy();
|
X = (A.transpose()*B)/*.lazy()*/;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||||
Y += coef * X;
|
Y += coef * X;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
|
Y = a*X + b*Y;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||||
cible = source;
|
cible = source;
|
||||||
}
|
}
|
||||||
@ -126,6 +131,16 @@ public :
|
|||||||
cible = source;
|
cible = source;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){
|
||||||
|
X = L.template marked<Lower>().inverseProduct(B);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
|
||||||
|
// C = X;
|
||||||
|
// Cholesky<gene_matrix>::computeInPlace(C);
|
||||||
|
C = X.cholesky().matrixL();
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
34
bench/btl/libs/eigen2/main_adv.cpp
Normal file
34
bench/btl/libs/eigen2/main_adv.cpp
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
//=====================================================
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
//=====================================================
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
//
|
||||||
|
#include "utilities.h"
|
||||||
|
#include "eigen2_interface.hh"
|
||||||
|
#include "bench.hh"
|
||||||
|
#include "action_trisolve.hh"
|
||||||
|
#include "action_cholesky.hh"
|
||||||
|
|
||||||
|
BTL_MAIN;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
34
bench/btl/libs/eigen2/main_linear.cpp
Normal file
34
bench/btl/libs/eigen2/main_linear.cpp
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
//=====================================================
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
//=====================================================
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
//
|
||||||
|
#include "utilities.h"
|
||||||
|
#include "eigen2_interface.hh"
|
||||||
|
#include "bench.hh"
|
||||||
|
#include "basic_actions.hh"
|
||||||
|
|
||||||
|
BTL_MAIN;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
|
||||||
|
bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
34
bench/btl/libs/eigen2/main_matmat.cpp
Normal file
34
bench/btl/libs/eigen2/main_matmat.cpp
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
//=====================================================
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
//=====================================================
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
//
|
||||||
|
#include "utilities.h"
|
||||||
|
#include "eigen2_interface.hh"
|
||||||
|
#include "bench.hh"
|
||||||
|
#include "basic_actions.hh"
|
||||||
|
|
||||||
|
BTL_MAIN;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -18,29 +18,15 @@
|
|||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "eigen2_interface.hh"
|
#include "eigen2_interface.hh"
|
||||||
#include "bench.hh"
|
#include "bench.hh"
|
||||||
#include "action_matrix_vector_product.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_matrix_matrix_product.hh"
|
|
||||||
#include "action_axpy.hh"
|
|
||||||
#include "action_lu_solve.hh"
|
|
||||||
#include "action_ata_product.hh"
|
|
||||||
#include "action_aat_product.hh"
|
|
||||||
#include "action_atv_product.hh"
|
|
||||||
|
|
||||||
BTL_MAIN;
|
BTL_MAIN;
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
|
||||||
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
|
||||||
bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
|
||||||
// bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
|
||||||
// bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
|
||||||
// bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
|
||||||
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
|
||||||
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -106,6 +106,10 @@ public :
|
|||||||
gmm::add(gmm::scaled(X,coef), Y);
|
gmm::add(gmm::scaled(X,coef), Y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
|
gmm::add(gmm::scaled(X,a), gmm::scaled(Y,b), Y);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||||
gmm::copy(source,cible);
|
gmm::copy(source,cible);
|
||||||
}
|
}
|
||||||
@ -114,6 +118,12 @@ public :
|
|||||||
gmm::copy(source,cible);
|
gmm::copy(source,cible);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
|
||||||
|
gmm::copy(B,X);
|
||||||
|
gmm::lower_tri_solve(L, X, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -18,13 +18,7 @@
|
|||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "gmm_interface.hh"
|
#include "gmm_interface.hh"
|
||||||
#include "bench.hh"
|
#include "bench.hh"
|
||||||
#include "action_matrix_vector_product.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_matrix_matrix_product.hh"
|
|
||||||
#include "action_axpy.hh"
|
|
||||||
#include "action_lu_solve.hh"
|
|
||||||
#include "action_ata_product.hh"
|
|
||||||
#include "action_aat_product.hh"
|
|
||||||
#include "action_atv_product.hh"
|
|
||||||
|
|
||||||
BTL_MAIN;
|
BTL_MAIN;
|
||||||
|
|
||||||
@ -32,12 +26,16 @@ int main()
|
|||||||
{
|
{
|
||||||
|
|
||||||
bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
|
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -72,92 +72,148 @@ static inline void matrix_vector_product(const gene_matrix & A, const gene_vecto
|
|||||||
{
|
{
|
||||||
asm("#begin matrix_vector_product");
|
asm("#begin matrix_vector_product");
|
||||||
int AN = (N/PacketSize)*PacketSize;
|
int AN = (N/PacketSize)*PacketSize;
|
||||||
int ANP = (AN/(4*PacketSize))*4*PacketSize;
|
int ANP = (AN/(2*PacketSize))*2*PacketSize;
|
||||||
int bound = (N/4)*4;
|
int bound = (N/4)*4;
|
||||||
for (int i=0;i<N;i++)
|
for (int i=0;i<N;i++)
|
||||||
X[i] = 0;
|
X[i] = 0;
|
||||||
|
|
||||||
for (int i=0;i<bound;i+=4)
|
for (int i=0;i<bound;i+=4)
|
||||||
{
|
{
|
||||||
real tmp0 = B[i];
|
register real* __restrict__ A0 = A + i*N;
|
||||||
Packet ptmp0 = ei_pset1(tmp0);
|
register real* __restrict__ A1 = A + (i+1)*N;
|
||||||
real tmp1 = B[i+1];
|
register real* __restrict__ A2 = A + (i+2)*N;
|
||||||
Packet ptmp1 = ei_pset1(tmp1);
|
register real* __restrict__ A3 = A + (i+3)*N;
|
||||||
real tmp2 = B[i+2];
|
|
||||||
Packet ptmp2 = ei_pset1(tmp2);
|
Packet ptmp0 = ei_pset1(B[i]);
|
||||||
real tmp3 = B[i+3];
|
Packet ptmp1 = ei_pset1(B[i+1]);
|
||||||
Packet ptmp3 = ei_pset1(tmp3);
|
Packet ptmp2 = ei_pset1(B[i+2]);
|
||||||
int iN0 = i*N;
|
Packet ptmp3 = ei_pset1(B[i+3]);
|
||||||
int iN1 = (i+1)*N;
|
// register Packet ptmp0, ptmp1, ptmp2, ptmp3;
|
||||||
int iN2 = (i+2)*N;
|
// asm(
|
||||||
int iN3 = (i+3)*N;
|
//
|
||||||
|
// "movss (%[B],%[j],4), %[ptmp0] \n\t"
|
||||||
|
// "shufps $0,%[ptmp0],%[ptmp0] \n\t"
|
||||||
|
// "movss 4(%[B],%[j],4), %[ptmp1] \n\t"
|
||||||
|
// "shufps $0,%[ptmp1],%[ptmp1] \n\t"
|
||||||
|
// "movss 8(%[B],%[j],4), %[ptmp2] \n\t"
|
||||||
|
// "shufps $0,%[ptmp2],%[ptmp2] \n\t"
|
||||||
|
// "movss 12(%[B],%[j],4), %[ptmp3] \n\t"
|
||||||
|
// "shufps $0,%[ptmp3],%[ptmp3] \n\t"
|
||||||
|
// : [ptmp0] "=x" (ptmp0),
|
||||||
|
// [ptmp1] "=x" (ptmp1),
|
||||||
|
// [ptmp2] "=x" (ptmp2),
|
||||||
|
// [ptmp3] "=x" (ptmp3)
|
||||||
|
// : [B] "r" (B),
|
||||||
|
// [j] "r" (size_t(i))
|
||||||
|
// : );
|
||||||
|
|
||||||
if (AN>0)
|
if (AN>0)
|
||||||
{
|
{
|
||||||
// int aligned0 = (iN0 % PacketSize);
|
// for (size_t j = 0;j<ANP;j+=8)
|
||||||
int aligned1 = (iN1 % PacketSize);
|
// {
|
||||||
|
// asm(
|
||||||
|
//
|
||||||
|
// "movaps (%[A0],%[j],4), %%xmm8 \n\t"
|
||||||
|
// "movaps 16(%[A0],%[j],4), %%xmm12 \n\t"
|
||||||
|
// "movups (%[A3],%[j],4), %%xmm11 \n\t"
|
||||||
|
// "movups 16(%[A3],%[j],4), %%xmm15 \n\t"
|
||||||
|
// "movups (%[A2],%[j],4), %%xmm10 \n\t"
|
||||||
|
// "movups 16(%[A2],%[j],4), %%xmm14 \n\t"
|
||||||
|
// "movups (%[A1],%[j],4), %%xmm9 \n\t"
|
||||||
|
// "movups 16(%[A1],%[j],4), %%xmm13 \n\t"
|
||||||
|
//
|
||||||
|
// "mulps %[ptmp0], %%xmm8 \n\t"
|
||||||
|
// "addps (%[res0],%[j],4), %%xmm8 \n\t"
|
||||||
|
// "mulps %[ptmp3], %%xmm11 \n\t"
|
||||||
|
// "addps %%xmm11, %%xmm8 \n\t"
|
||||||
|
// "mulps %[ptmp2], %%xmm10 \n\t"
|
||||||
|
// "addps %%xmm10, %%xmm8 \n\t"
|
||||||
|
// "mulps %[ptmp1], %%xmm9 \n\t"
|
||||||
|
// "addps %%xmm9, %%xmm8 \n\t"
|
||||||
|
// "movaps %%xmm8, (%[res0],%[j],4) \n\t"
|
||||||
|
//
|
||||||
|
// "mulps %[ptmp0], %%xmm12 \n\t"
|
||||||
|
// "addps 16(%[res0],%[j],4), %%xmm12 \n\t"
|
||||||
|
// "mulps %[ptmp3], %%xmm15 \n\t"
|
||||||
|
// "addps %%xmm15, %%xmm12 \n\t"
|
||||||
|
// "mulps %[ptmp2], %%xmm14 \n\t"
|
||||||
|
// "addps %%xmm14, %%xmm12 \n\t"
|
||||||
|
// "mulps %[ptmp1], %%xmm13 \n\t"
|
||||||
|
// "addps %%xmm13, %%xmm12 \n\t"
|
||||||
|
// "movaps %%xmm12, 16(%[res0],%[j],4) \n\t"
|
||||||
|
// :
|
||||||
|
// : [res0] "r" (X), [j] "r" (j),[A0] "r" (A0),
|
||||||
|
// [A1] "r" (A1),
|
||||||
|
// [A2] "r" (A2),
|
||||||
|
// [A3] "r" (A3),
|
||||||
|
// [ptmp0] "x" (ptmp0),
|
||||||
|
// [ptmp1] "x" (ptmp1),
|
||||||
|
// [ptmp2] "x" (ptmp2),
|
||||||
|
// [ptmp3] "x" (ptmp3)
|
||||||
|
// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "%r14");
|
||||||
|
// }
|
||||||
|
register Packet A00;
|
||||||
|
register Packet A01;
|
||||||
|
register Packet A02;
|
||||||
|
register Packet A03;
|
||||||
|
register Packet A10;
|
||||||
|
register Packet A11;
|
||||||
|
register Packet A12;
|
||||||
|
register Packet A13;
|
||||||
|
for (int j = 0;j<ANP;j+=2*PacketSize)
|
||||||
|
{
|
||||||
|
// A00 = ei_pload(&A0[j]);
|
||||||
|
// A01 = ei_ploadu(&A1[j]);
|
||||||
|
// A02 = ei_ploadu(&A2[j]);
|
||||||
|
// A03 = ei_ploadu(&A3[j]);
|
||||||
|
// A10 = ei_pload(&A0[j+PacketSize]);
|
||||||
|
// A11 = ei_ploadu(&A1[j+PacketSize]);
|
||||||
|
// A12 = ei_ploadu(&A2[j+PacketSize]);
|
||||||
|
// A13 = ei_ploadu(&A3[j+PacketSize]);
|
||||||
|
//
|
||||||
|
// A00 = ei_pmul(ptmp0, A00);
|
||||||
|
// A01 = ei_pmul(ptmp1, A01);
|
||||||
|
// A02 = ei_pmul(ptmp2, A02);
|
||||||
|
// A03 = ei_pmul(ptmp3, A03);
|
||||||
|
// A10 = ei_pmul(ptmp0, A10);
|
||||||
|
// A11 = ei_pmul(ptmp1, A11);
|
||||||
|
// A12 = ei_pmul(ptmp2, A12);
|
||||||
|
// A13 = ei_pmul(ptmp3, A13);
|
||||||
|
//
|
||||||
|
// A00 = ei_padd(A00,A01);
|
||||||
|
// A02 = ei_padd(A02,A03);
|
||||||
|
// A00 = ei_padd(A00,ei_pload(&X[j]));
|
||||||
|
// A00 = ei_padd(A00,A02);
|
||||||
|
// ei_pstore(&X[j],A00);
|
||||||
|
//
|
||||||
|
// A10 = ei_padd(A10,A11);
|
||||||
|
// A12 = ei_padd(A12,A13);
|
||||||
|
// A10 = ei_padd(A10,ei_pload(&X[j+PacketSize]));
|
||||||
|
// A10 = ei_padd(A10,A12);
|
||||||
|
// ei_pstore(&X[j+PacketSize],A10);
|
||||||
|
|
||||||
if (aligned1==0)
|
|
||||||
{
|
|
||||||
for (int j = 0;j<AN;j+=PacketSize)
|
|
||||||
{
|
|
||||||
ei_pstore(&X[j],
|
ei_pstore(&X[j],
|
||||||
ei_padd(ei_pload(&X[j]),
|
ei_padd(ei_pload(&X[j]),
|
||||||
ei_padd(
|
ei_padd(
|
||||||
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))),
|
ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
|
||||||
ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) )));
|
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (aligned1==2)
|
|
||||||
{
|
|
||||||
for (int j = 0;j<AN;j+=PacketSize)
|
|
||||||
{
|
|
||||||
ei_pstore(&X[j],
|
|
||||||
ei_padd(ei_pload(&X[j]),
|
|
||||||
ei_padd(
|
|
||||||
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
|
|
||||||
ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int j = 0;j<ANP;j+=4*PacketSize)
|
|
||||||
{
|
|
||||||
ei_pstore(&X[j],
|
|
||||||
ei_padd(ei_pload(&X[j]),
|
|
||||||
ei_padd(
|
|
||||||
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
|
|
||||||
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
|
|
||||||
|
|
||||||
ei_pstore(&X[j+PacketSize],
|
ei_pstore(&X[j+PacketSize],
|
||||||
ei_padd(ei_pload(&X[j+PacketSize]),
|
ei_padd(ei_pload(&X[j+PacketSize]),
|
||||||
ei_padd(
|
ei_padd(
|
||||||
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))),
|
ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j+PacketSize])),ei_pmul(ptmp1,ei_ploadu(&A1[j+PacketSize]))),
|
||||||
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) )));
|
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j+PacketSize])),ei_pmul(ptmp3,ei_ploadu(&A3[j+PacketSize]))) )));
|
||||||
|
|
||||||
ei_pstore(&X[j+2*PacketSize],
|
|
||||||
ei_padd(ei_pload(&X[j+2*PacketSize]),
|
|
||||||
ei_padd(
|
|
||||||
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
|
|
||||||
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
|
|
||||||
|
|
||||||
ei_pstore(&X[j+3*PacketSize],
|
|
||||||
ei_padd(ei_pload(&X[j+3*PacketSize]),
|
|
||||||
ei_padd(
|
|
||||||
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
|
|
||||||
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
for (int j = ANP;j<AN;j+=PacketSize)
|
for (int j = ANP;j<AN;j+=PacketSize)
|
||||||
ei_pstore(&X[j],
|
ei_pstore(&X[j],
|
||||||
ei_padd(ei_pload(&X[j]),
|
ei_padd(ei_pload(&X[j]),
|
||||||
ei_padd(
|
ei_padd(
|
||||||
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
|
ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
|
||||||
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
|
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// process remaining scalars
|
// process remaining scalars
|
||||||
for (int j=AN;j<N;j++)
|
for (int j=AN;j<N;j++)
|
||||||
X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1];
|
X[j] += ei_pfirst(ptmp0) * A0[j] + ei_pfirst(ptmp1) * A1[j] + ei_pfirst(ptmp2) * A2[j] + ei_pfirst(ptmp3) * A3[j];
|
||||||
}
|
}
|
||||||
for (int i=bound;i<N;i++)
|
for (int i=bound;i<N;i++)
|
||||||
{
|
{
|
||||||
@ -181,6 +237,119 @@ static inline void matrix_vector_product(const gene_matrix & A, const gene_vecto
|
|||||||
asm("#end matrix_vector_product");
|
asm("#end matrix_vector_product");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||||
|
// {
|
||||||
|
// asm("#begin matrix_vector_product");
|
||||||
|
// int AN = (N/PacketSize)*PacketSize;
|
||||||
|
// int ANP = (AN/(2*PacketSize))*2*PacketSize;
|
||||||
|
// int bound = (N/4)*4;
|
||||||
|
// for (int i=0;i<N;i++)
|
||||||
|
// X[i] = 0;
|
||||||
|
//
|
||||||
|
// for (int i=0;i<bound;i+=4)
|
||||||
|
// {
|
||||||
|
// real tmp0 = B[i];
|
||||||
|
// Packet ptmp0 = ei_pset1(tmp0);
|
||||||
|
// real tmp1 = B[i+1];
|
||||||
|
// Packet ptmp1 = ei_pset1(tmp1);
|
||||||
|
// real tmp2 = B[i+2];
|
||||||
|
// Packet ptmp2 = ei_pset1(tmp2);
|
||||||
|
// real tmp3 = B[i+3];
|
||||||
|
// Packet ptmp3 = ei_pset1(tmp3);
|
||||||
|
// int iN0 = i*N;
|
||||||
|
// int iN1 = (i+1)*N;
|
||||||
|
// int iN2 = (i+2)*N;
|
||||||
|
// int iN3 = (i+3)*N;
|
||||||
|
// if (AN>0)
|
||||||
|
// {
|
||||||
|
// // int aligned0 = (iN0 % PacketSize);
|
||||||
|
// int aligned1 = (iN1 % PacketSize);
|
||||||
|
//
|
||||||
|
// if (aligned1==0)
|
||||||
|
// {
|
||||||
|
// for (int j = 0;j<AN;j+=PacketSize)
|
||||||
|
// {
|
||||||
|
// ei_pstore(&X[j],
|
||||||
|
// ei_padd(ei_pload(&X[j]),
|
||||||
|
// ei_padd(
|
||||||
|
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))),
|
||||||
|
// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) )));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else if (aligned1==2)
|
||||||
|
// {
|
||||||
|
// for (int j = 0;j<AN;j+=PacketSize)
|
||||||
|
// {
|
||||||
|
// ei_pstore(&X[j],
|
||||||
|
// ei_padd(ei_pload(&X[j]),
|
||||||
|
// ei_padd(
|
||||||
|
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
|
||||||
|
// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// for (int j = 0;j<ANP;j+=2*PacketSize)
|
||||||
|
// {
|
||||||
|
// ei_pstore(&X[j],
|
||||||
|
// ei_padd(ei_pload(&X[j]),
|
||||||
|
// ei_padd(
|
||||||
|
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
|
||||||
|
// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
|
||||||
|
//
|
||||||
|
// ei_pstore(&X[j+PacketSize],
|
||||||
|
// ei_padd(ei_pload(&X[j+PacketSize]),
|
||||||
|
// ei_padd(
|
||||||
|
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))),
|
||||||
|
// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) )));
|
||||||
|
//
|
||||||
|
// // ei_pstore(&X[j+2*PacketSize],
|
||||||
|
// // ei_padd(ei_pload(&X[j+2*PacketSize]),
|
||||||
|
// // ei_padd(
|
||||||
|
// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
|
||||||
|
// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
|
||||||
|
// //
|
||||||
|
// // ei_pstore(&X[j+3*PacketSize],
|
||||||
|
// // ei_padd(ei_pload(&X[j+3*PacketSize]),
|
||||||
|
// // ei_padd(
|
||||||
|
// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
|
||||||
|
// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
// for (int j = ANP;j<AN;j+=PacketSize)
|
||||||
|
// ei_pstore(&X[j],
|
||||||
|
// ei_padd(ei_pload(&X[j]),
|
||||||
|
// ei_padd(
|
||||||
|
// ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
|
||||||
|
// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// // process remaining scalars
|
||||||
|
// for (int j=AN;j<N;j++)
|
||||||
|
// X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1] + tmp2 * A[j+iN2] + tmp3 * A[j+iN3];
|
||||||
|
// }
|
||||||
|
// for (int i=bound;i<N;i++)
|
||||||
|
// {
|
||||||
|
// real tmp0 = B[i];
|
||||||
|
// Packet ptmp0 = ei_pset1(tmp0);
|
||||||
|
// int iN0 = i*N;
|
||||||
|
// if (AN>0)
|
||||||
|
// {
|
||||||
|
// bool aligned0 = (iN0 % PacketSize) == 0;
|
||||||
|
// if (aligned0)
|
||||||
|
// for (int j = 0;j<AN;j+=PacketSize)
|
||||||
|
// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pload(&X[j])));
|
||||||
|
// else
|
||||||
|
// for (int j = 0;j<AN;j+=PacketSize)
|
||||||
|
// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pload(&X[j])));
|
||||||
|
// }
|
||||||
|
// // process remaining scalars
|
||||||
|
// for (int j=AN;j<N;j++)
|
||||||
|
// X[j] += tmp0 * A[j+iN0];
|
||||||
|
// }
|
||||||
|
// asm("#end matrix_vector_product");
|
||||||
|
// }
|
||||||
|
|
||||||
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||||
// {
|
// {
|
||||||
// asm("#begin matrix_vector_product");
|
// asm("#begin matrix_vector_product");
|
||||||
|
@ -18,13 +18,8 @@
|
|||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "mtl4_interface.hh"
|
#include "mtl4_interface.hh"
|
||||||
#include "bench.hh"
|
#include "bench.hh"
|
||||||
#include "action_matrix_vector_product.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_matrix_matrix_product.hh"
|
#include "action_cholesky.hh"
|
||||||
#include "action_axpy.hh"
|
|
||||||
#include "action_lu_solve.hh"
|
|
||||||
#include "action_ata_product.hh"
|
|
||||||
#include "action_aat_product.hh"
|
|
||||||
#include "action_atv_product.hh"
|
|
||||||
|
|
||||||
BTL_MAIN;
|
BTL_MAIN;
|
||||||
|
|
||||||
@ -32,12 +27,17 @@ int main()
|
|||||||
{
|
{
|
||||||
|
|
||||||
bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
// bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
// bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#define MTL4_INTERFACE_HH
|
#define MTL4_INTERFACE_HH
|
||||||
|
|
||||||
#include <boost/numeric/mtl/mtl.hpp>
|
#include <boost/numeric/mtl/mtl.hpp>
|
||||||
|
#include <boost/numeric/mtl/operation/cholesky.hpp>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
using namespace mtl;
|
using namespace mtl;
|
||||||
@ -81,6 +82,9 @@ public :
|
|||||||
|
|
||||||
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||||
X = (A*B);
|
X = (A*B);
|
||||||
|
// morton_dense<double, doppled_64_row_mask> C(N,N);
|
||||||
|
// C = B;
|
||||||
|
// X = (A*C);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||||
@ -88,11 +92,11 @@ public :
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||||
// X = (trans(A)*A);
|
X = (trans(A)*A);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||||
// X = (A*trans(A));
|
X = (A*trans(A));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
@ -107,6 +111,19 @@ public :
|
|||||||
Y += coef * X;
|
Y += coef * X;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
|
Y = a*X + b*Y;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
|
||||||
|
C = X;
|
||||||
|
recursive_cholesky(C);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
|
||||||
|
X = lower_trisolve(L, B);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||||
cible = source;
|
cible = source;
|
||||||
}
|
}
|
||||||
|
@ -20,18 +20,15 @@
|
|||||||
#include "utilities.h"
|
#include "utilities.h"
|
||||||
#include "ublas_interface.hh"
|
#include "ublas_interface.hh"
|
||||||
#include "bench.hh"
|
#include "bench.hh"
|
||||||
#include "action_matrix_vector_product.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_matrix_matrix_product.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_axpy.hh"
|
|
||||||
#include "action_ata_product.hh"
|
|
||||||
#include "action_aat_product.hh"
|
|
||||||
#include "action_atv_product.hh"
|
|
||||||
|
|
||||||
BTL_MAIN;
|
BTL_MAIN;
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
bench<Action_axpy<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
bench<Action_axpy<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
bench<Action_axpby<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_vector_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
@ -40,6 +37,8 @@ int main()
|
|||||||
bench<Action_ata_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_aat_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_aat_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
bench<Action_trisolve<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,7 +23,9 @@
|
|||||||
#include <boost/numeric/ublas/vector.hpp>
|
#include <boost/numeric/ublas/vector.hpp>
|
||||||
#include <boost/numeric/ublas/matrix.hpp>
|
#include <boost/numeric/ublas/matrix.hpp>
|
||||||
#include <boost/numeric/ublas/io.hpp>
|
#include <boost/numeric/ublas/io.hpp>
|
||||||
|
#include <boost/numeric/ublas/triangular.hpp>
|
||||||
|
|
||||||
|
using namespace boost::numeric;
|
||||||
|
|
||||||
template <class real>
|
template <class real>
|
||||||
class ublas_interface{
|
class ublas_interface{
|
||||||
@ -116,6 +118,10 @@ public :
|
|||||||
Y.plus_assign(coef*X);
|
Y.plus_assign(coef*X);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
|
Y = a*X + b*Y;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
||||||
// X = prod(trans(A),A);
|
// X = prod(trans(A),A);
|
||||||
X.assign(prod(trans(A),A));
|
X.assign(prod(trans(A),A));
|
||||||
@ -126,6 +132,10 @@ public :
|
|||||||
X.assign(prod(A,trans(A)));
|
X.assign(prod(A,trans(A)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
|
||||||
|
X = solve(L, B, ublas::lower_tag ());
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -60,8 +60,8 @@ int main(int argc, char *argv[])
|
|||||||
BenchTimer timer;
|
BenchTimer timer;
|
||||||
#if 1
|
#if 1
|
||||||
EigenSparseTriMatrix sm1(rows,cols);
|
EigenSparseTriMatrix sm1(rows,cols);
|
||||||
VectorXf b = VectorXf::random(cols);
|
VectorXf b = VectorXf::Random(cols);
|
||||||
VectorXf x = VectorXf::random(cols);
|
VectorXf x = VectorXf::Random(cols);
|
||||||
|
|
||||||
bool densedone = false;
|
bool densedone = false;
|
||||||
|
|
||||||
@ -171,9 +171,9 @@ int main(int argc, char *argv[])
|
|||||||
{
|
{
|
||||||
timer.reset();
|
timer.reset();
|
||||||
for (int _j=0; _j<10; ++_j) {
|
for (int _j=0; _j<10; ++_j) {
|
||||||
Matrix4f m = Matrix4f::random();
|
Matrix4f m = Matrix4f::Random();
|
||||||
Vector4f b = Vector4f::random();
|
Vector4f b = Vector4f::Random();
|
||||||
Vector4f x = Vector4f::random();
|
Vector4f x = Vector4f::Random();
|
||||||
timer.start();
|
timer.start();
|
||||||
for (int _k=0; _k<1000000; ++_k) {
|
for (int _k=0; _k<1000000; ++_k) {
|
||||||
b = m.inverseProduct(b);
|
b = m.inverseProduct(b);
|
||||||
@ -186,9 +186,9 @@ int main(int argc, char *argv[])
|
|||||||
{
|
{
|
||||||
timer.reset();
|
timer.reset();
|
||||||
for (int _j=0; _j<10; ++_j) {
|
for (int _j=0; _j<10; ++_j) {
|
||||||
Matrix4f m = Matrix4f::random();
|
Matrix4f m = Matrix4f::Random();
|
||||||
Vector4f b = Vector4f::random();
|
Vector4f b = Vector4f::Random();
|
||||||
Vector4f x = Vector4f::random();
|
Vector4f x = Vector4f::Random();
|
||||||
timer.start();
|
timer.start();
|
||||||
for (int _k=0; _k<1000000; ++_k) {
|
for (int _k=0; _k<1000000; ++_k) {
|
||||||
m.inverseProductInPlace(x);
|
m.inverseProductInPlace(x);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user