mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
various update of of BTL
This commit is contained in:
parent
3288e9e168
commit
45136ac3b6
@ -43,10 +43,10 @@ Finally, if bench results already exist (the bench*.dat files) then they merges
|
|||||||
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
|
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
|
||||||
|
|
||||||
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
|
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
|
||||||
If gnuplot is available, choose a directory name in the data directory to store the results and type
|
If gnuplot is available, choose a directory name in the data directory to store the results and type:
|
||||||
cd data
|
$ cd data
|
||||||
mkdir my_directory
|
$ mkdir my_directory
|
||||||
cp ../libs/*/*.dat my_directory
|
$ cp ../libs/*/*.dat my_directory
|
||||||
Build the data utilities in this (data) directory
|
Build the data utilities in this (data) directory
|
||||||
make
|
make
|
||||||
Then you can look the raw data,
|
Then you can look the raw data,
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
#include "action_trisolve.hh"
|
#include "action_trisolve.hh"
|
||||||
|
|
||||||
#include "action_symv.hh"
|
#include "action_symv.hh"
|
||||||
#include "action_symm.hh"
|
// #include "action_symm.hh"
|
||||||
#include "action_syr2.hh"
|
#include "action_syr2.hh"
|
||||||
|
|
||||||
// #include "action_lu_solve.hh"
|
// #include "action_lu_solve.hh"
|
||||||
|
@ -15,23 +15,25 @@ find_path(ATLAS_INCLUDES
|
|||||||
find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
# find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
# find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
# find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
# find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
# find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
if(NOT ATLAS_LAPACK)
|
||||||
# find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
endif(NOT ATLAS_LAPACK)
|
||||||
|
|
||||||
# find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
# find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
# if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
|
if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
|
||||||
set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_LAPACK}
|
|
||||||
# ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS}
|
set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_CBLAS} ${ATLAS_F77BLAS} ${ATLAS_LIB})
|
||||||
)
|
|
||||||
# endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
|
endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
|
||||||
|
|
||||||
include(FindPackageHandleStandardArgs)
|
include(FindPackageHandleStandardArgs)
|
||||||
find_package_handle_standard_args(ATLAS DEFAULT_MSG
|
find_package_handle_standard_args(ATLAS DEFAULT_MSG
|
||||||
|
@ -15,6 +15,10 @@ find_path(GOTO_INCLUDES
|
|||||||
find_file(GOTO_LIBRARIES libgotoblas.so PATHS /usr/lib $ENV{GOTODIR} ${LIB_INSTALL_DIR})
|
find_file(GOTO_LIBRARIES libgotoblas.so PATHS /usr/lib $ENV{GOTODIR} ${LIB_INSTALL_DIR})
|
||||||
find_library(GOTO_LIBRARIES gotoblas PATHS $ENV{GOTODIR} ${LIB_INSTALL_DIR})
|
find_library(GOTO_LIBRARIES gotoblas PATHS $ENV{GOTODIR} ${LIB_INSTALL_DIR})
|
||||||
|
|
||||||
|
if(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
|
||||||
|
set(GOTO_LIBRARIES ${GOTO_LIBRARIES} "-lpthread")
|
||||||
|
endif(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
|
||||||
|
|
||||||
include(FindPackageHandleStandardArgs)
|
include(FindPackageHandleStandardArgs)
|
||||||
find_package_handle_standard_args(GOTO DEFAULT_MSG
|
find_package_handle_standard_args(GOTO DEFAULT_MSG
|
||||||
GOTO_INCLUDES GOTO_LIBRARIES)
|
GOTO_INCLUDES GOTO_LIBRARIES)
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:1024
|
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:2048
|
||||||
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:1024
|
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:2048
|
||||||
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:1024
|
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:2048
|
||||||
axpby ; "{/*1.5 Y = alpha * X + beta * Y}" ; "vector size" ; 5:1000000
|
axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
|
||||||
axpy ; "{/*1.5 Y += alpha * X}" ; "vector size" ; 5:1000000
|
axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
|
||||||
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:1024
|
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:2048
|
||||||
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:1024
|
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:2048
|
||||||
trisolve ; "{/*1.5 triangular solver (X = inv(L) * X)}" ; "size" ; 4:1024
|
trisolve ; "{/*1.5 triangular solver (X = inv(L) X)}" ; "size" ; 4:2048
|
||||||
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:1024
|
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:2048
|
||||||
lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:1024
|
lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:2048
|
||||||
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:1024
|
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:2048
|
||||||
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:1024
|
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:2048
|
||||||
|
symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:2048
|
||||||
|
syr2 ; "{/*1.5 symmetric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:2048
|
@ -1,7 +1,20 @@
|
|||||||
#! /bin/bash
|
#! /bin/bash
|
||||||
|
|
||||||
|
if [ $# < 1 ]; then
|
||||||
|
echo "Usage: $0 working_directory [tiny|large [prefix]]"
|
||||||
|
else
|
||||||
|
|
||||||
mkdir -p $1
|
mkdir -p $1
|
||||||
##cp ../libs/*/*.dat $1
|
##cp ../libs/*/*.dat $1
|
||||||
|
|
||||||
|
mode=large
|
||||||
|
if [ $# > 2 ]; then
|
||||||
|
mode=$2
|
||||||
|
fi
|
||||||
|
if [ $# > 3 ]; then
|
||||||
|
prefix=$3
|
||||||
|
fi
|
||||||
|
|
||||||
EIGENDIR=`cat eigen_root_dir.txt`
|
EIGENDIR=`cat eigen_root_dir.txt`
|
||||||
|
|
||||||
webpagefilename=$1/index.html
|
webpagefilename=$1/index.html
|
||||||
@ -18,19 +31,22 @@ echo '<ul>'\
|
|||||||
'</ul>' \
|
'</ul>' \
|
||||||
'</p>' >> $webpagefilename
|
'</p>' >> $webpagefilename
|
||||||
|
|
||||||
source mk_mean_script.sh axpy $1 11 2500 100000 250000 $2
|
source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix
|
||||||
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $2
|
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix
|
||||||
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $2
|
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh atv $1 11 50 300 1000 $2
|
source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $2
|
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh aat $1 11 100 300 1000 $2
|
source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh ata $1 11 100 300 1000 $2
|
source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh trisolve $1 11 100 300 1000 $2
|
source mk_mean_script.sh trisolve $1 11 100 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh cholesky $1 11 100 300 1000 $2
|
source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $2
|
source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $2
|
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix
|
||||||
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $2
|
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix
|
||||||
|
source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix
|
||||||
|
source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
## compile the web page ##
|
## compile the web page ##
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ MINIC=$3
|
|||||||
MAXIC=$4
|
MAXIC=$4
|
||||||
MINOC=$5
|
MINOC=$5
|
||||||
MAXOC=$6
|
MAXOC=$6
|
||||||
|
prefix=$8
|
||||||
|
|
||||||
meanstatsfilename=$2/mean.html
|
meanstatsfilename=$2/mean.html
|
||||||
|
|
||||||
@ -37,7 +38,7 @@ echo '<br/>' >> $meanstatsfilename
|
|||||||
|
|
||||||
webpagefilename=$2/index.html
|
webpagefilename=$2/index.html
|
||||||
# echo '<h3>'${WHAT}'</h3>' >> $webpagefilename
|
# echo '<h3>'${WHAT}'</h3>' >> $webpagefilename
|
||||||
echo '<hr/><a href="/btl/'$1'.pdf"><img src="/btl/'$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename
|
echo '<hr/><a href="'$prefix$1'.pdf"><img src="'$prefix$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,11 +37,11 @@
|
|||||||
// min matrix size for matrix matrix product bench
|
// min matrix size for matrix matrix product bench
|
||||||
#define MIN_MM 5
|
#define MIN_MM 5
|
||||||
// max matrix size for matrix matrix product bench
|
// max matrix size for matrix matrix product bench
|
||||||
#define MAX_MM 2048
|
#define MAX_MM MAX_MV
|
||||||
// min matrix size for LU bench
|
// min matrix size for LU bench
|
||||||
#define MIN_LU 5
|
#define MIN_LU 5
|
||||||
// max matrix size for LU bench
|
// max matrix size for LU bench
|
||||||
#define MAX_LU 1024
|
#define MAX_LU 2048
|
||||||
// max size for tiny vector and matrix
|
// max size for tiny vector and matrix
|
||||||
#define TINY_MV_MAX_SIZE 16
|
#define TINY_MV_MAX_SIZE 16
|
||||||
// default nb_sample for x86 timer
|
// default nb_sample for x86 timer
|
||||||
|
@ -169,7 +169,7 @@ class BtlConfig
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
BtlConfig()
|
BtlConfig()
|
||||||
: overwriteResults(false)
|
: overwriteResults(false), checkResults(true)
|
||||||
{
|
{
|
||||||
char * _config;
|
char * _config;
|
||||||
_config = getenv ("BTL_CONFIG");
|
_config = getenv ("BTL_CONFIG");
|
||||||
@ -193,6 +193,10 @@ public:
|
|||||||
{
|
{
|
||||||
Instance.overwriteResults = true;
|
Instance.overwriteResults = true;
|
||||||
}
|
}
|
||||||
|
else if (config[i].beginsWith("--nocheck"))
|
||||||
|
{
|
||||||
|
Instance.checkResults = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -214,6 +218,7 @@ public:
|
|||||||
|
|
||||||
static BtlConfig Instance;
|
static BtlConfig Instance;
|
||||||
bool overwriteResults;
|
bool overwriteResults;
|
||||||
|
bool checkResults;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::vector<BtlString> m_selectedActionNames;
|
std::vector<BtlString> m_selectedActionNames;
|
||||||
|
@ -65,9 +65,12 @@ public:
|
|||||||
time_action = time_action / (double(_nb_calc));
|
time_action = time_action / (double(_nb_calc));
|
||||||
|
|
||||||
// check
|
// check
|
||||||
action.initialize();
|
if (BtlConfig::Instance.checkResults)
|
||||||
action.calculate();
|
{
|
||||||
action.check_result();
|
action.initialize();
|
||||||
|
action.calculate();
|
||||||
|
action.check_result();
|
||||||
|
}
|
||||||
return action.nb_op_base()/(time_action*1000000.0);
|
return action.nb_op_base()/(time_action*1000000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,7 +132,7 @@ static char notrans = 'N';
|
|||||||
static char trans = 'T';
|
static char trans = 'T';
|
||||||
static char nonunit = 'N';
|
static char nonunit = 'N';
|
||||||
static char lower = 'L';
|
static char lower = 'L';
|
||||||
static blasint intone = 1;
|
static int intone = 1;
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class C_BLAS_interface<float> : public f77_interface_base<float>
|
class C_BLAS_interface<float> : public f77_interface_base<float>
|
||||||
@ -160,6 +160,14 @@ public :
|
|||||||
cblas_ssymv(CblasColMajor,CblasLower,N,1.0,A,N,B,1,0.0,X,1);
|
cblas_ssymv(CblasColMajor,CblasLower,N,1.0,A,N,B,1,0.0,X,1);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
|
#ifdef PUREBLAS
|
||||||
|
ssyr2_(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
|
||||||
|
#else
|
||||||
|
cblas_ssyr2(CblasColMajor,CblasLower,N,1.0,B,1,X,1,A,N);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
#ifdef PUREBLAS
|
#ifdef PUREBLAS
|
||||||
|
@ -41,6 +41,7 @@ int main()
|
|||||||
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_symv<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_symv<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
bench<Action_syr2<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
@ -146,6 +146,15 @@ public :
|
|||||||
X[j] += t2;
|
X[j] += t2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||||
|
{
|
||||||
|
for (int j=0; j<N; ++j)
|
||||||
|
{
|
||||||
|
for (int i=j; i<N; ++i)
|
||||||
|
A[j][i] += B[i]*X[j] + B[j]*X[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||||
{
|
{
|
||||||
|
@ -31,6 +31,7 @@ int main()
|
|||||||
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
@ -7,7 +7,7 @@ if (EIGEN2_FOUND)
|
|||||||
btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
|
btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
|
||||||
btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
|
btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
|
||||||
btl_add_bench(btl_eigen2_adv main_adv.cpp)
|
btl_add_bench(btl_eigen2_adv main_adv.cpp)
|
||||||
|
|
||||||
IF(NOT BTL_NOVEC)
|
IF(NOT BTL_NOVEC)
|
||||||
btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
|
btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
|
||||||
btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)
|
btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
#ifndef EIGEN2_INTERFACE_HH
|
#ifndef EIGEN2_INTERFACE_HH
|
||||||
#define EIGEN2_INTERFACE_HH
|
#define EIGEN2_INTERFACE_HH
|
||||||
// #include <cblas.h>
|
// #include <cblas.h>
|
||||||
#include <Eigen/Core>
|
#include <Eigen/Array>
|
||||||
#include <Eigen/Cholesky>
|
#include <Eigen/Cholesky>
|
||||||
#include <Eigen/LU>
|
#include <Eigen/LU>
|
||||||
#include <Eigen/QR>
|
#include <Eigen/QR>
|
||||||
@ -45,7 +45,9 @@ public :
|
|||||||
|
|
||||||
static inline std::string name( void )
|
static inline std::string name( void )
|
||||||
{
|
{
|
||||||
#if defined(EIGEN_VECTORIZE_SSE)
|
#if defined(EIGEN_USE_NEW_PRODUCT)
|
||||||
|
if (SIZE==Dynamic) return "eigen2_newprod"; else return "tiny_eigen2";
|
||||||
|
#elif defined(EIGEN_VECTORIZE_SSE)
|
||||||
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
|
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
|
||||||
#elif defined(EIGEN_VECTORIZE_ALTIVEC)
|
#elif defined(EIGEN_VECTORIZE_ALTIVEC)
|
||||||
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
|
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
|
||||||
@ -114,7 +116,57 @@ public :
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){
|
static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){
|
||||||
X = (A.template marked<SelfAdjoint|LowerTriangular>() * B)/*.lazy()*/;
|
//X = (A.template marked<SelfAdjoint|LowerTriangular>() * B)/*.lazy()*/;
|
||||||
|
ei_product_selfadjoint_vector<real,0,LowerTriangularBit>(N,A.data(),N, B.data(), X.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Dest, typename Src> static void triassign(Dest& dst, const Src& src)
|
||||||
|
{
|
||||||
|
typedef typename Dest::Scalar Scalar;
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||||
|
const int PacketSize = sizeof(Packet)/sizeof(Scalar);
|
||||||
|
int size = dst.cols();
|
||||||
|
for(int j=0; j<size; j+=1)
|
||||||
|
{
|
||||||
|
// const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
||||||
|
Scalar* A0 = dst.data() + j*dst.stride();
|
||||||
|
int starti = j;
|
||||||
|
int alignedEnd = starti;
|
||||||
|
int alignedStart = (starti) + ei_alignmentOffset(&A0[starti], size-starti);
|
||||||
|
alignedEnd = alignedStart + ((size-alignedStart)/(2*PacketSize))*(PacketSize*2);
|
||||||
|
|
||||||
|
// do the non-vectorizable part of the assignment
|
||||||
|
for (int index = starti; index<alignedStart ; ++index)
|
||||||
|
{
|
||||||
|
if(Dest::Flags&RowMajorBit)
|
||||||
|
dst.copyCoeff(j, index, src);
|
||||||
|
else
|
||||||
|
dst.copyCoeff(index, j, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
// do the vectorizable part of the assignment
|
||||||
|
for (int index = alignedStart; index<alignedEnd; index+=PacketSize)
|
||||||
|
{
|
||||||
|
if(Dest::Flags&RowMajorBit)
|
||||||
|
dst.template copyPacket<Src, Aligned, Unaligned>(j, index, src);
|
||||||
|
else
|
||||||
|
dst.template copyPacket<Src, Aligned, Unaligned>(index, j, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
// do the non-vectorizable part of the assignment
|
||||||
|
for (int index = alignedEnd; index<size; ++index)
|
||||||
|
{
|
||||||
|
if(Dest::Flags&RowMajorBit)
|
||||||
|
dst.copyCoeff(j, index, src);
|
||||||
|
else
|
||||||
|
dst.copyCoeff(index, j, src);
|
||||||
|
}
|
||||||
|
//dst.col(j).end(N-j) = src.col(j).end(N-j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static EIGEN_DONT_INLINE void syr2(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
|
||||||
|
// ei_product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
@ -126,7 +178,9 @@ public :
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
|
asm("#begin axpby");
|
||||||
Y = a*X + b*Y;
|
Y = a*X + b*Y;
|
||||||
|
asm("#end axpby");
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||||
@ -158,7 +212,10 @@ public :
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
|
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
|
||||||
C = Tridiagonalization<gene_matrix>(X).packedMatrix();
|
typename Tridiagonalization<gene_matrix>::CoeffVectorType aux(N-1);
|
||||||
|
C = X;
|
||||||
|
Tridiagonalization<gene_matrix>::_compute(C, aux);
|
||||||
|
// C = Tridiagonalization<gene_matrix>(X).packedMatrix();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
|
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
|
||||||
|
@ -19,7 +19,6 @@
|
|||||||
#include "eigen2_interface.hh"
|
#include "eigen2_interface.hh"
|
||||||
#include "bench.hh"
|
#include "bench.hh"
|
||||||
#include "basic_actions.hh"
|
#include "basic_actions.hh"
|
||||||
#include "action_symv.hh"
|
|
||||||
|
|
||||||
BTL_MAIN;
|
BTL_MAIN;
|
||||||
|
|
||||||
@ -28,6 +27,7 @@ int main()
|
|||||||
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_symv<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_symv<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
bench<Action_syr2<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -38,16 +38,16 @@ public :
|
|||||||
typedef typename f77_interface_base<real>::gene_vector gene_vector;
|
typedef typename f77_interface_base<real>::gene_vector gene_vector;
|
||||||
|
|
||||||
static void free_matrix(gene_matrix & A, int N){
|
static void free_matrix(gene_matrix & A, int N){
|
||||||
ei_aligned_delete(A);
|
ei_aligned_free(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void free_vector(gene_vector & B){
|
static void free_vector(gene_vector & B){
|
||||||
ei_aligned_delete(B);
|
ei_aligned_free(B);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
|
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||||
int N = A_stl.size();
|
int N = A_stl.size();
|
||||||
A = ei_aligned_new<real>(N*N);
|
A = (real*)ei_aligned_malloc(N*N*sizeof(real));
|
||||||
for (int j=0;j<N;j++)
|
for (int j=0;j<N;j++)
|
||||||
for (int i=0;i<N;i++)
|
for (int i=0;i<N;i++)
|
||||||
A[i+N*j] = A_stl[j][i];
|
A[i+N*j] = A_stl[j][i];
|
||||||
@ -55,7 +55,7 @@ public :
|
|||||||
|
|
||||||
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
|
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
|
||||||
int N = B_stl.size();
|
int N = B_stl.size();
|
||||||
B = ei_aligned_new<real>(N);
|
B = (real*)ei_aligned_malloc(N*sizeof(real));
|
||||||
for (int i=0;i<N;i++)
|
for (int i=0;i<N;i++)
|
||||||
B[i] = B_stl[i];
|
B[i] = B_stl[i];
|
||||||
}
|
}
|
||||||
@ -236,6 +236,129 @@ public :
|
|||||||
}
|
}
|
||||||
asm("#end matrix_vector_product");
|
asm("#end matrix_vector_product");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||||
|
{
|
||||||
|
|
||||||
|
// int AN = (N/PacketSize)*PacketSize;
|
||||||
|
// int ANP = (AN/(2*PacketSize))*2*PacketSize;
|
||||||
|
// int bound = (N/4)*4;
|
||||||
|
for (int i=0;i<N;i++)
|
||||||
|
X[i] = 0;
|
||||||
|
|
||||||
|
int bound = std::max(0,N-8) & 0xfffffffE;
|
||||||
|
|
||||||
|
for (int j=0;j<bound;j+=2)
|
||||||
|
{
|
||||||
|
register real* __restrict__ A0 = A + j*N;
|
||||||
|
register real* __restrict__ A1 = A + (j+1)*N;
|
||||||
|
|
||||||
|
real t0 = B[j];
|
||||||
|
Packet ptmp0 = ei_pset1(t0);
|
||||||
|
real t1 = B[j+1];
|
||||||
|
Packet ptmp1 = ei_pset1(t1);
|
||||||
|
|
||||||
|
real t2 = 0;
|
||||||
|
Packet ptmp2 = ei_pset1(t2);
|
||||||
|
real t3 = 0;
|
||||||
|
Packet ptmp3 = ei_pset1(t3);
|
||||||
|
|
||||||
|
int starti = j+2;
|
||||||
|
int alignedEnd = starti;
|
||||||
|
int alignedStart = (starti) + ei_alignmentOffset(&X[starti], N-starti);
|
||||||
|
alignedEnd = alignedStart + ((N-alignedStart)/(PacketSize))*(PacketSize);
|
||||||
|
|
||||||
|
X[j] += t0 * A0[j];
|
||||||
|
X[j+1] += t1 * A1[j];
|
||||||
|
|
||||||
|
X[j+1] += t0 * A0[j+1];
|
||||||
|
t2 += A0[j+1] * B[j+1];
|
||||||
|
|
||||||
|
// alignedStart = alignedEnd;
|
||||||
|
for (int i=starti; i<alignedStart; ++i) {
|
||||||
|
X[i] += t0 * A0[i] + t1 * A1[i];
|
||||||
|
t2 += A0[i] * B[i];
|
||||||
|
t3 += A1[i] * B[i];
|
||||||
|
}
|
||||||
|
asm("#begin symv");
|
||||||
|
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) {
|
||||||
|
Packet A0i = ei_ploadu(&A0[i]);
|
||||||
|
Packet A1i = ei_ploadu(&A1[i]);
|
||||||
|
// Packet A0i1 = ei_ploadu(&A0[i+PacketSize]);
|
||||||
|
Packet Xi = ei_pload(&X[i]);
|
||||||
|
Packet Bi = ei_pload/*u*/(&B[i]);
|
||||||
|
// Packet Xi1 = ei_pload(&X[i+PacketSize]);
|
||||||
|
// Packet Bi1 = ei_pload/*u*/(&B[i+PacketSize]);
|
||||||
|
Xi = ei_padd(ei_padd(Xi, ei_pmul(ptmp0, A0i)), ei_pmul(ptmp1, A1i));
|
||||||
|
ptmp2 = ei_padd(ptmp2, ei_pmul(A0i, Bi));
|
||||||
|
ptmp3 = ei_padd(ptmp3, ei_pmul(A1i, Bi));
|
||||||
|
// Xi1 = ei_padd(Xi1, ei_pmul(ptmp1, A0i1));
|
||||||
|
// ptmp2 = ei_padd(ptmp2, ei_pmul(A0i1, Bi1));
|
||||||
|
//
|
||||||
|
ei_pstore(&X[i],Xi);
|
||||||
|
// ei_pstore(&X[i+PacketSize],Xi1);
|
||||||
|
// asm(
|
||||||
|
// "prefetchnta 64(%[A0],%[i],4) \n\t"
|
||||||
|
// //"movups (%[A0],%[i],4), %%xmm8 \n\t"
|
||||||
|
// "movsd (%[A0],%[i],4), %%xmm8 \n\t"
|
||||||
|
// "movhps 8(%[A0],%[i],4), %%xmm8 \n\t"
|
||||||
|
// // "movups 16(%[A0],%[i],4), %%xmm9 \n\t"
|
||||||
|
// // "movups 64(%[A0],%[i],4), %%xmm15 \n\t"
|
||||||
|
// "movaps (%[B], %[i],4), %%xmm12 \n\t"
|
||||||
|
// // "movaps 16(%[B], %[i],4), %%xmm13 \n\t"
|
||||||
|
// "movaps (%[X], %[i],4), %%xmm10 \n\t"
|
||||||
|
// // "movaps 16(%[X], %[i],4), %%xmm11 \n\t"
|
||||||
|
//
|
||||||
|
// "mulps %%xmm8, %%xmm12 \n\t"
|
||||||
|
// // "mulps %%xmm9, %%xmm13 \n\t"
|
||||||
|
//
|
||||||
|
// "mulps %[ptmp1], %%xmm8 \n\t"
|
||||||
|
// "addps %%xmm12, %[ptmp2] \n\t"
|
||||||
|
// "addps %%xmm8, %%xmm10 \n\t"
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// // "mulps %[ptmp1], %%xmm9 \n\t"
|
||||||
|
//
|
||||||
|
// // "addps %%xmm9, %%xmm11 \n\t"
|
||||||
|
// // "addps %%xmm13, %[ptmp2] \n\t"
|
||||||
|
//
|
||||||
|
// "movaps %%xmm10, (%[X],%[i],4) \n\t"
|
||||||
|
// // "movaps %%xmm11, 16(%[X],%[i],4) \n\t"
|
||||||
|
// :
|
||||||
|
// : [X] "r" (X), [i] "r" (i), [A0] "r" (A0),
|
||||||
|
// [B] "r" (B),
|
||||||
|
// [ptmp1] "x" (ptmp1),
|
||||||
|
// [ptmp2] "x" (ptmp2)
|
||||||
|
// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm15");
|
||||||
|
}
|
||||||
|
asm("#end symv");
|
||||||
|
for (int i=alignedEnd; i<N; i++) {
|
||||||
|
X[i] += t0 * A0[i] + t1 * A1[i];
|
||||||
|
t2 += A0[i] * B[i];
|
||||||
|
t3 += A1[i] * B[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
X[j] += t2 + ei_predux(ptmp2);
|
||||||
|
X[j+1] += t3 + ei_predux(ptmp3);
|
||||||
|
}
|
||||||
|
for (int j=bound;j<N;j++)
|
||||||
|
{
|
||||||
|
register real* __restrict__ A0 = A + j*N;
|
||||||
|
|
||||||
|
real t1 = B[j];
|
||||||
|
real t2 = 0;
|
||||||
|
X[j] += t1 * A0[j];
|
||||||
|
for (int i=j+1; i<N; i+=PacketSize) {
|
||||||
|
X[i] += t1 * A0[i];
|
||||||
|
t2 += A0[i] * B[i];
|
||||||
|
}
|
||||||
|
X[j] += t2;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||||
// {
|
// {
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include "action_axpy.hh"
|
#include "action_axpy.hh"
|
||||||
#include "action_ata_product.hh"
|
#include "action_ata_product.hh"
|
||||||
#include "action_aat_product.hh"
|
#include "action_aat_product.hh"
|
||||||
|
#include "basic_actions.hh"
|
||||||
//#include "action_lu_solve.hh"
|
//#include "action_lu_solve.hh"
|
||||||
// #include "timers/mixed_perf_analyzer.hh"
|
// #include "timers/mixed_perf_analyzer.hh"
|
||||||
|
|
||||||
@ -39,6 +40,7 @@ int main()
|
|||||||
// bench<Action_matrix_matrix_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
// bench<Action_matrix_matrix_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_aat_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
// bench<Action_aat_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_ata_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
// bench<Action_ata_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
bench<Action_symv<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_axpy<hand_vec_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
bench<Action_axpy<hand_vec_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user