diff --git a/bench/btl/README b/bench/btl/README
index 787002f9a..f3f5fb36f 100644
--- a/bench/btl/README
+++ b/bench/btl/README
@@ -43,10 +43,10 @@ Finally, if bench results already exist (the bench*.dat files) then they merges
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
- If gnuplot is available, choose a directory name in the data directory to store the results and type
- cd data
- mkdir my_directory
- cp ../libs/*/*.dat my_directory
+ If gnuplot is available, choose a directory name in the data directory to store the results and type:
+ $ cd data
+ $ mkdir my_directory
+ $ cp ../libs/*/*.dat my_directory
Build the data utilities in this (data) directory
make
Then you can look the raw data,
diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh
index 1e6e420f7..a23e58096 100644
--- a/bench/btl/actions/basic_actions.hh
+++ b/bench/btl/actions/basic_actions.hh
@@ -12,7 +12,7 @@
#include "action_trisolve.hh"
#include "action_symv.hh"
-#include "action_symm.hh"
+// #include "action_symm.hh"
#include "action_syr2.hh"
// #include "action_lu_solve.hh"
diff --git a/bench/btl/cmake/FindATLAS.cmake b/bench/btl/cmake/FindATLAS.cmake
index bba350ba7..b4a984abe 100644
--- a/bench/btl/cmake/FindATLAS.cmake
+++ b/bench/btl/cmake/FindATLAS.cmake
@@ -15,23 +15,25 @@ find_path(ATLAS_INCLUDES
find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+if(NOT ATLAS_LAPACK)
+ find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+ find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+endif(NOT ATLAS_LAPACK)
-# find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
-# if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
-set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_LAPACK}
-# ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS}
-)
-# endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
+if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
+
+ set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_CBLAS} ${ATLAS_F77BLAS} ${ATLAS_LIB})
+
+endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(ATLAS DEFAULT_MSG
diff --git a/bench/btl/cmake/FindGOTO.cmake b/bench/btl/cmake/FindGOTO.cmake
index b2b648b14..ad7eb3200 100644
--- a/bench/btl/cmake/FindGOTO.cmake
+++ b/bench/btl/cmake/FindGOTO.cmake
@@ -15,6 +15,10 @@ find_path(GOTO_INCLUDES
find_file(GOTO_LIBRARIES libgotoblas.so PATHS /usr/lib $ENV{GOTODIR} ${LIB_INSTALL_DIR})
find_library(GOTO_LIBRARIES gotoblas PATHS $ENV{GOTODIR} ${LIB_INSTALL_DIR})
+if(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
+ set(GOTO_LIBRARIES ${GOTO_LIBRARIES} "-lpthread")
+endif(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
+
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(GOTO DEFAULT_MSG
GOTO_INCLUDES GOTO_LIBRARIES)
diff --git a/bench/btl/data/action_settings.txt b/bench/btl/data/action_settings.txt
index 26557279b..5e88cee99 100644
--- a/bench/btl/data/action_settings.txt
+++ b/bench/btl/data/action_settings.txt
@@ -1,12 +1,14 @@
-aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:1024
-ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:1024
-atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:1024
-axpby ; "{/*1.5 Y = alpha * X + beta * Y}" ; "vector size" ; 5:1000000
-axpy ; "{/*1.5 Y += alpha * X}" ; "vector size" ; 5:1000000
-matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:1024
-matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:1024
-trisolve ; "{/*1.5 triangular solver (X = inv(L) * X)}" ; "size" ; 4:1024
-cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:1024
-lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:1024
-tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:1024
-hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:1024
\ No newline at end of file
+aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:2048
+ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:2048
+atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:2048
+axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
+axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
+matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:2048
+matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:2048
+trisolve ; "{/*1.5 triangular solver (X = inv(L) X)}" ; "size" ; 4:2048
+cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:2048
+lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:2048
+tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:2048
+hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:2048
+symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:2048
+syr2 ; "{/*1.5 symmetric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:2048
\ No newline at end of file
diff --git a/bench/btl/data/go_mean b/bench/btl/data/go_mean
index f8edf43db..71cca6126 100755
--- a/bench/btl/data/go_mean
+++ b/bench/btl/data/go_mean
@@ -1,7 +1,20 @@
#! /bin/bash
+
+if [ $# < 1 ]; then
+ echo "Usage: $0 working_directory [tiny|large [prefix]]"
+else
+
mkdir -p $1
##cp ../libs/*/*.dat $1
+mode=large
+if [ $# > 2 ]; then
+ mode=$2
+fi
+if [ $# > 3 ]; then
+ prefix=$3
+fi
+
EIGENDIR=`cat eigen_root_dir.txt`
webpagefilename=$1/index.html
@@ -18,19 +31,22 @@ echo '
' \
'' >> $webpagefilename
-source mk_mean_script.sh axpy $1 11 2500 100000 250000 $2
-source mk_mean_script.sh axpby $1 11 2500 100000 250000 $2
-source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $2
-source mk_mean_script.sh atv $1 11 50 300 1000 $2
-source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $2
-source mk_mean_script.sh aat $1 11 100 300 1000 $2
-source mk_mean_script.sh ata $1 11 100 300 1000 $2
-source mk_mean_script.sh trisolve $1 11 100 300 1000 $2
-source mk_mean_script.sh cholesky $1 11 100 300 1000 $2
-source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $2
-source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $2
-source mk_mean_script.sh hessenberg $1 11 100 300 1000 $2
+source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix
+source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix
+source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix
+source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
+source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh trisolve $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix
+source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix
+source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix
+fi
## compile the web page ##
diff --git a/bench/btl/data/mk_mean_script.sh b/bench/btl/data/mk_mean_script.sh
index baa0fd9df..43bab559a 100644
--- a/bench/btl/data/mk_mean_script.sh
+++ b/bench/btl/data/mk_mean_script.sh
@@ -5,6 +5,7 @@ MINIC=$3
MAXIC=$4
MINOC=$5
MAXOC=$6
+prefix=$8
meanstatsfilename=$2/mean.html
@@ -37,7 +38,7 @@ echo '
' >> $meanstatsfilename
webpagefilename=$2/index.html
# echo ''${WHAT}'
' >> $webpagefilename
-echo '

' >> $webpagefilename
+echo '

' >> $webpagefilename
diff --git a/bench/btl/generic_bench/bench_parameter.hh b/bench/btl/generic_bench/bench_parameter.hh
index 1295b374d..08fea80e4 100644
--- a/bench/btl/generic_bench/bench_parameter.hh
+++ b/bench/btl/generic_bench/bench_parameter.hh
@@ -37,11 +37,11 @@
// min matrix size for matrix matrix product bench
#define MIN_MM 5
// max matrix size for matrix matrix product bench
-#define MAX_MM 2048
+#define MAX_MM MAX_MV
// min matrix size for LU bench
#define MIN_LU 5
// max matrix size for LU bench
-#define MAX_LU 1024
+#define MAX_LU 2048
// max size for tiny vector and matrix
#define TINY_MV_MAX_SIZE 16
// default nb_sample for x86 timer
diff --git a/bench/btl/generic_bench/btl.hh b/bench/btl/generic_bench/btl.hh
index 38e2c5f45..fdc099296 100644
--- a/bench/btl/generic_bench/btl.hh
+++ b/bench/btl/generic_bench/btl.hh
@@ -169,7 +169,7 @@ class BtlConfig
{
public:
BtlConfig()
- : overwriteResults(false)
+ : overwriteResults(false), checkResults(true)
{
char * _config;
_config = getenv ("BTL_CONFIG");
@@ -193,6 +193,10 @@ public:
{
Instance.overwriteResults = true;
}
+ else if (config[i].beginsWith("--nocheck"))
+ {
+ Instance.checkResults = false;
+ }
}
}
@@ -214,6 +218,7 @@ public:
static BtlConfig Instance;
bool overwriteResults;
+ bool checkResults;
protected:
std::vector m_selectedActionNames;
diff --git a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
index d0fe95ce0..67d3378fc 100644
--- a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
@@ -65,9 +65,12 @@ public:
time_action = time_action / (double(_nb_calc));
// check
- action.initialize();
- action.calculate();
- action.check_result();
+ if (BtlConfig::Instance.checkResults)
+ {
+ action.initialize();
+ action.calculate();
+ action.check_result();
+ }
return action.nb_op_base()/(time_action*1000000.0);
}
diff --git a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
index 319658c6b..a726fa89d 100644
--- a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
+++ b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
@@ -132,7 +132,7 @@ static char notrans = 'N';
static char trans = 'T';
static char nonunit = 'N';
static char lower = 'L';
-static blasint intone = 1;
+static int intone = 1;
template<>
class C_BLAS_interface : public f77_interface_base
@@ -160,6 +160,14 @@ public :
cblas_ssymv(CblasColMajor,CblasLower,N,1.0,A,N,B,1,0.0,X,1);
#endif
}
+
+ static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
+ #ifdef PUREBLAS
+ ssyr2_(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
+ #else
+ cblas_ssyr2(CblasColMajor,CblasLower,N,1.0,B,1,X,1,A,N);
+ #endif
+ }
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
#ifdef PUREBLAS
diff --git a/bench/btl/libs/C_BLAS/main.cpp b/bench/btl/libs/C_BLAS/main.cpp
index 1eee55077..57cb9930e 100644
--- a/bench/btl/libs/C_BLAS/main.cpp
+++ b/bench/btl/libs/C_BLAS/main.cpp
@@ -41,6 +41,7 @@ int main()
bench > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MV,MAX_MV,NB_POINT);
+ bench > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MM,MAX_MM,NB_POINT);
bench > >(MIN_MM,MAX_MM,NB_POINT);
diff --git a/bench/btl/libs/STL/STL_interface.hh b/bench/btl/libs/STL/STL_interface.hh
index 9dd9b8ed4..3958d4af5 100644
--- a/bench/btl/libs/STL/STL_interface.hh
+++ b/bench/btl/libs/STL/STL_interface.hh
@@ -146,6 +146,15 @@ public :
X[j] += t2;
}
}
+
+ static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
+ {
+ for (int j=0; j > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MV,MAX_MV,NB_POINT);
+ bench > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MM,MAX_MM,NB_POINT);
bench > >(MIN_MM,MAX_MM,NB_POINT);
bench > >(MIN_MM,MAX_MM,NB_POINT);
diff --git a/bench/btl/libs/eigen2/CMakeLists.txt b/bench/btl/libs/eigen2/CMakeLists.txt
index f061a27de..beb37f79b 100644
--- a/bench/btl/libs/eigen2/CMakeLists.txt
+++ b/bench/btl/libs/eigen2/CMakeLists.txt
@@ -7,7 +7,7 @@ if (EIGEN2_FOUND)
btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
btl_add_bench(btl_eigen2_adv main_adv.cpp)
-
+
IF(NOT BTL_NOVEC)
btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)
diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh
index 2b463f017..92a5677d3 100644
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@@ -18,7 +18,7 @@
#ifndef EIGEN2_INTERFACE_HH
#define EIGEN2_INTERFACE_HH
// #include
-#include
+#include
#include
#include
#include
@@ -45,7 +45,9 @@ public :
static inline std::string name( void )
{
- #if defined(EIGEN_VECTORIZE_SSE)
+ #if defined(EIGEN_USE_NEW_PRODUCT)
+ if (SIZE==Dynamic) return "eigen2_newprod"; else return "tiny_eigen2";
+ #elif defined(EIGEN_VECTORIZE_SSE)
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
#elif defined(EIGEN_VECTORIZE_ALTIVEC)
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
@@ -114,7 +116,57 @@ public :
}
static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){
- X = (A.template marked() * B)/*.lazy()*/;
+ //X = (A.template marked() * B)/*.lazy()*/;
+ ei_product_selfadjoint_vector(N,A.data(),N, B.data(), X.data());
+ }
+
+ template static void triassign(Dest& dst, const Src& src)
+ {
+ typedef typename Dest::Scalar Scalar;
+ typedef typename ei_packet_traits::type Packet;
+ const int PacketSize = sizeof(Packet)/sizeof(Scalar);
+ int size = dst.cols();
+ for(int j=0; j(j, index, src);
+ else
+ dst.template copyPacket(index, j, src);
+ }
+
+ // do the non-vectorizable part of the assignment
+ for (int index = alignedEnd; index(N,A.data(),N, X.data(), 1, Y.data(), 1, -1);
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
@@ -126,7 +178,9 @@ public :
}
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+ asm("#begin axpby");
Y = a*X + b*Y;
+ asm("#end axpby");
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
@@ -158,7 +212,10 @@ public :
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
- C = Tridiagonalization(X).packedMatrix();
+ typename Tridiagonalization::CoeffVectorType aux(N-1);
+ C = X;
+ Tridiagonalization::_compute(C, aux);
+// C = Tridiagonalization(X).packedMatrix();
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
diff --git a/bench/btl/libs/eigen2/main_vecmat.cpp b/bench/btl/libs/eigen2/main_vecmat.cpp
index 881d90e2a..fb00d6f79 100644
--- a/bench/btl/libs/eigen2/main_vecmat.cpp
+++ b/bench/btl/libs/eigen2/main_vecmat.cpp
@@ -19,7 +19,6 @@
#include "eigen2_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
-#include "action_symv.hh"
BTL_MAIN;
@@ -28,6 +27,7 @@ int main()
bench > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MV,MAX_MV,NB_POINT);
bench > >(MIN_MV,MAX_MV,NB_POINT);
+ bench > >(MIN_MV,MAX_MV,NB_POINT);
return 0;
}
diff --git a/bench/btl/libs/hand_vec/hand_vec_interface.hh b/bench/btl/libs/hand_vec/hand_vec_interface.hh
index 4e7d549ce..6080b2460 100755
--- a/bench/btl/libs/hand_vec/hand_vec_interface.hh
+++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh
@@ -38,16 +38,16 @@ public :
typedef typename f77_interface_base::gene_vector gene_vector;
static void free_matrix(gene_matrix & A, int N){
- ei_aligned_delete(A);
+ ei_aligned_free(A);
}
static void free_vector(gene_vector & B){
- ei_aligned_delete(B);
+ ei_aligned_free(B);
}
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
int N = A_stl.size();
- A = ei_aligned_new(N*N);
+ A = (real*)ei_aligned_malloc(N*N*sizeof(real));
for (int j=0;j(N);
+ B = (real*)ei_aligned_malloc(N*sizeof(real));
for (int i=0;i > >(MIN_MM,MAX_MM,NB_POINT);
// bench > >(MIN_MM,MAX_MM,NB_POINT);
// bench > >(MIN_MM,MAX_MM,NB_POINT);
+ bench > >(MIN_MM,MAX_MM,NB_POINT);
bench > >(MIN_AXPY,MAX_AXPY,NB_POINT);