diff --git a/bench/btl/README b/bench/btl/README index 787002f9a..f3f5fb36f 100644 --- a/bench/btl/README +++ b/bench/btl/README @@ -43,10 +43,10 @@ Finally, if bench results already exist (the bench*.dat files) then they merges BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2 4 : Analyze the result. different data files (.dat) are produced in each libs directories. - If gnuplot is available, choose a directory name in the data directory to store the results and type - cd data - mkdir my_directory - cp ../libs/*/*.dat my_directory + If gnuplot is available, choose a directory name in the data directory to store the results and type: + $ cd data + $ mkdir my_directory + $ cp ../libs/*/*.dat my_directory Build the data utilities in this (data) directory make Then you can look the raw data, diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh index 1e6e420f7..a23e58096 100644 --- a/bench/btl/actions/basic_actions.hh +++ b/bench/btl/actions/basic_actions.hh @@ -12,7 +12,7 @@ #include "action_trisolve.hh" #include "action_symv.hh" -#include "action_symm.hh" +// #include "action_symm.hh" #include "action_syr2.hh" // #include "action_lu_solve.hh" diff --git a/bench/btl/cmake/FindATLAS.cmake b/bench/btl/cmake/FindATLAS.cmake index bba350ba7..b4a984abe 100644 --- a/bench/btl/cmake/FindATLAS.cmake +++ b/bench/btl/cmake/FindATLAS.cmake @@ -15,23 +15,25 @@ find_path(ATLAS_INCLUDES find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +if(NOT ATLAS_LAPACK) + find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +endif(NOT ATLAS_LAPACK) -# find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -# if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) -set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_LAPACK} -# ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS} -) -# endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) +if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) + + set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_CBLAS} ${ATLAS_F77BLAS} ${ATLAS_LIB}) + +endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(ATLAS DEFAULT_MSG diff --git a/bench/btl/cmake/FindGOTO.cmake b/bench/btl/cmake/FindGOTO.cmake index b2b648b14..ad7eb3200 100644 --- a/bench/btl/cmake/FindGOTO.cmake +++ b/bench/btl/cmake/FindGOTO.cmake @@ -15,6 +15,10 @@ find_path(GOTO_INCLUDES find_file(GOTO_LIBRARIES libgotoblas.so PATHS /usr/lib $ENV{GOTODIR} ${LIB_INSTALL_DIR}) find_library(GOTO_LIBRARIES gotoblas PATHS $ENV{GOTODIR} ${LIB_INSTALL_DIR}) +if(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) + set(GOTO_LIBRARIES ${GOTO_LIBRARIES} "-lpthread") +endif(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(GOTO DEFAULT_MSG GOTO_INCLUDES GOTO_LIBRARIES) diff --git a/bench/btl/data/action_settings.txt b/bench/btl/data/action_settings.txt index 26557279b..5e88cee99 100644 --- a/bench/btl/data/action_settings.txt +++ b/bench/btl/data/action_settings.txt @@ -1,12 +1,14 @@ -aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:1024 -ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:1024 -atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:1024 -axpby ; "{/*1.5 Y = alpha * X + beta * Y}" ; "vector size" ; 5:1000000 -axpy ; "{/*1.5 Y += alpha * X}" ; "vector size" ; 5:1000000 -matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:1024 -matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:1024 -trisolve ; "{/*1.5 triangular solver (X = inv(L) * X)}" ; "size" ; 4:1024 -cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:1024 -lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:1024 -tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:1024 -hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:1024 \ No newline at end of file +aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:2048 +ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:2048 +atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:2048 +axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000 +axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000 +matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:2048 +matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:2048 +trisolve ; "{/*1.5 triangular solver (X = inv(L) X)}" ; "size" ; 4:2048 +cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:2048 +lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:2048 +tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:2048 +hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:2048 +symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:2048 +syr2 ; "{/*1.5 symmetric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:2048 \ No newline at end of file diff --git a/bench/btl/data/go_mean b/bench/btl/data/go_mean index f8edf43db..71cca6126 100755 --- a/bench/btl/data/go_mean +++ b/bench/btl/data/go_mean @@ -1,7 +1,20 @@ #! /bin/bash + +if [ $# < 1 ]; then + echo "Usage: $0 working_directory [tiny|large [prefix]]" +else + mkdir -p $1 ##cp ../libs/*/*.dat $1 +mode=large +if [ $# > 2 ]; then + mode=$2 +fi +if [ $# > 3 ]; then + prefix=$3 +fi + EIGENDIR=`cat eigen_root_dir.txt` webpagefilename=$1/index.html @@ -18,19 +31,22 @@ echo '' \ '

' >> $webpagefilename -source mk_mean_script.sh axpy $1 11 2500 100000 250000 $2 -source mk_mean_script.sh axpby $1 11 2500 100000 250000 $2 -source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $2 -source mk_mean_script.sh atv $1 11 50 300 1000 $2 -source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $2 -source mk_mean_script.sh aat $1 11 100 300 1000 $2 -source mk_mean_script.sh ata $1 11 100 300 1000 $2 -source mk_mean_script.sh trisolve $1 11 100 300 1000 $2 -source mk_mean_script.sh cholesky $1 11 100 300 1000 $2 -source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $2 -source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $2 -source mk_mean_script.sh hessenberg $1 11 100 300 1000 $2 +source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix +source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix +source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh trisolve $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix +fi ## compile the web page ## diff --git a/bench/btl/data/mk_mean_script.sh b/bench/btl/data/mk_mean_script.sh index baa0fd9df..43bab559a 100644 --- a/bench/btl/data/mk_mean_script.sh +++ b/bench/btl/data/mk_mean_script.sh @@ -5,6 +5,7 @@ MINIC=$3 MAXIC=$4 MINOC=$5 MAXOC=$6 +prefix=$8 meanstatsfilename=$2/mean.html @@ -37,7 +38,7 @@ echo '
' >> $meanstatsfilename webpagefilename=$2/index.html # echo '

'${WHAT}'

' >> $webpagefilename -echo '
'${WHAT}'
' >> $webpagefilename +echo '
'${WHAT}'
' >> $webpagefilename diff --git a/bench/btl/generic_bench/bench_parameter.hh b/bench/btl/generic_bench/bench_parameter.hh index 1295b374d..08fea80e4 100644 --- a/bench/btl/generic_bench/bench_parameter.hh +++ b/bench/btl/generic_bench/bench_parameter.hh @@ -37,11 +37,11 @@ // min matrix size for matrix matrix product bench #define MIN_MM 5 // max matrix size for matrix matrix product bench -#define MAX_MM 2048 +#define MAX_MM MAX_MV // min matrix size for LU bench #define MIN_LU 5 // max matrix size for LU bench -#define MAX_LU 1024 +#define MAX_LU 2048 // max size for tiny vector and matrix #define TINY_MV_MAX_SIZE 16 // default nb_sample for x86 timer diff --git a/bench/btl/generic_bench/btl.hh b/bench/btl/generic_bench/btl.hh index 38e2c5f45..fdc099296 100644 --- a/bench/btl/generic_bench/btl.hh +++ b/bench/btl/generic_bench/btl.hh @@ -169,7 +169,7 @@ class BtlConfig { public: BtlConfig() - : overwriteResults(false) + : overwriteResults(false), checkResults(true) { char * _config; _config = getenv ("BTL_CONFIG"); @@ -193,6 +193,10 @@ public: { Instance.overwriteResults = true; } + else if (config[i].beginsWith("--nocheck")) + { + Instance.checkResults = false; + } } } @@ -214,6 +218,7 @@ public: static BtlConfig Instance; bool overwriteResults; + bool checkResults; protected: std::vector m_selectedActionNames; diff --git a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh index d0fe95ce0..67d3378fc 100644 --- a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh +++ b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh @@ -65,9 +65,12 @@ public: time_action = time_action / (double(_nb_calc)); // check - action.initialize(); - action.calculate(); - action.check_result(); + if (BtlConfig::Instance.checkResults) + { + action.initialize(); + action.calculate(); + action.check_result(); + } return action.nb_op_base()/(time_action*1000000.0); } diff --git a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh index 319658c6b..a726fa89d 100644 --- a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh +++ b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh @@ -132,7 +132,7 @@ static char notrans = 'N'; static char trans = 'T'; static char nonunit = 'N'; static char lower = 'L'; -static blasint intone = 1; +static int intone = 1; template<> class C_BLAS_interface : public f77_interface_base @@ -160,6 +160,14 @@ public : cblas_ssymv(CblasColMajor,CblasLower,N,1.0,A,N,B,1,0.0,X,1); #endif } + + static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + #ifdef PUREBLAS + ssyr2_(&lower,&N,&fone,B,&intone,X,&intone,A,&N); + #else + cblas_ssyr2(CblasColMajor,CblasLower,N,1.0,B,1,X,1,A,N); + #endif + } static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ #ifdef PUREBLAS diff --git a/bench/btl/libs/C_BLAS/main.cpp b/bench/btl/libs/C_BLAS/main.cpp index 1eee55077..57cb9930e 100644 --- a/bench/btl/libs/C_BLAS/main.cpp +++ b/bench/btl/libs/C_BLAS/main.cpp @@ -41,6 +41,7 @@ int main() bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/bench/btl/libs/STL/STL_interface.hh b/bench/btl/libs/STL/STL_interface.hh index 9dd9b8ed4..3958d4af5 100644 --- a/bench/btl/libs/STL/STL_interface.hh +++ b/bench/btl/libs/STL/STL_interface.hh @@ -146,6 +146,15 @@ public : X[j] += t2; } } + + static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N) + { + for (int j=0; j > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/bench/btl/libs/eigen2/CMakeLists.txt b/bench/btl/libs/eigen2/CMakeLists.txt index f061a27de..beb37f79b 100644 --- a/bench/btl/libs/eigen2/CMakeLists.txt +++ b/bench/btl/libs/eigen2/CMakeLists.txt @@ -7,7 +7,7 @@ if (EIGEN2_FOUND) btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp) btl_add_bench(btl_eigen2_matmat main_matmat.cpp) btl_add_bench(btl_eigen2_adv main_adv.cpp) - + IF(NOT BTL_NOVEC) btl_add_bench(btl_eigen2_novec_linear main_linear.cpp) btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp) diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh index 2b463f017..92a5677d3 100644 --- a/bench/btl/libs/eigen2/eigen2_interface.hh +++ b/bench/btl/libs/eigen2/eigen2_interface.hh @@ -18,7 +18,7 @@ #ifndef EIGEN2_INTERFACE_HH #define EIGEN2_INTERFACE_HH // #include -#include +#include #include #include #include @@ -45,7 +45,9 @@ public : static inline std::string name( void ) { - #if defined(EIGEN_VECTORIZE_SSE) + #if defined(EIGEN_USE_NEW_PRODUCT) + if (SIZE==Dynamic) return "eigen2_newprod"; else return "tiny_eigen2"; + #elif defined(EIGEN_VECTORIZE_SSE) if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; #elif defined(EIGEN_VECTORIZE_ALTIVEC) if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; @@ -114,7 +116,57 @@ public : } static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){ - X = (A.template marked() * B)/*.lazy()*/; + //X = (A.template marked() * B)/*.lazy()*/; + ei_product_selfadjoint_vector(N,A.data(),N, B.data(), X.data()); + } + + template static void triassign(Dest& dst, const Src& src) + { + typedef typename Dest::Scalar Scalar; + typedef typename ei_packet_traits::type Packet; + const int PacketSize = sizeof(Packet)/sizeof(Scalar); + int size = dst.cols(); + for(int j=0; j(j, index, src); + else + dst.template copyPacket(index, j, src); + } + + // do the non-vectorizable part of the assignment + for (int index = alignedEnd; index(N,A.data(),N, X.data(), 1, Y.data(), 1, -1); } static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ @@ -126,7 +178,9 @@ public : } static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + asm("#begin axpby"); Y = a*X + b*Y; + asm("#end axpby"); } static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ @@ -158,7 +212,10 @@ public : } static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ - C = Tridiagonalization(X).packedMatrix(); + typename Tridiagonalization::CoeffVectorType aux(N-1); + C = X; + Tridiagonalization::_compute(C, aux); +// C = Tridiagonalization(X).packedMatrix(); } static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ diff --git a/bench/btl/libs/eigen2/main_vecmat.cpp b/bench/btl/libs/eigen2/main_vecmat.cpp index 881d90e2a..fb00d6f79 100644 --- a/bench/btl/libs/eigen2/main_vecmat.cpp +++ b/bench/btl/libs/eigen2/main_vecmat.cpp @@ -19,7 +19,6 @@ #include "eigen2_interface.hh" #include "bench.hh" #include "basic_actions.hh" -#include "action_symv.hh" BTL_MAIN; @@ -28,6 +27,7 @@ int main() bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); return 0; } diff --git a/bench/btl/libs/hand_vec/hand_vec_interface.hh b/bench/btl/libs/hand_vec/hand_vec_interface.hh index 4e7d549ce..6080b2460 100755 --- a/bench/btl/libs/hand_vec/hand_vec_interface.hh +++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh @@ -38,16 +38,16 @@ public : typedef typename f77_interface_base::gene_vector gene_vector; static void free_matrix(gene_matrix & A, int N){ - ei_aligned_delete(A); + ei_aligned_free(A); } static void free_vector(gene_vector & B){ - ei_aligned_delete(B); + ei_aligned_free(B); } static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ int N = A_stl.size(); - A = ei_aligned_new(N*N); + A = (real*)ei_aligned_malloc(N*N*sizeof(real)); for (int j=0;j(N); + B = (real*)ei_aligned_malloc(N*sizeof(real)); for (int i=0;i > >(MIN_MM,MAX_MM,NB_POINT); // bench > >(MIN_MM,MAX_MM,NB_POINT); // bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_AXPY,MAX_AXPY,NB_POINT);