various update of of BTL

This commit is contained in:
Gael Guennebaud 2009-03-04 07:21:17 +00:00
parent 3288e9e168
commit 45136ac3b6
19 changed files with 294 additions and 60 deletions

View File

@ -43,10 +43,10 @@ Finally, if bench results already exist (the bench*.dat files) then they merges
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2 BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
4 : Analyze the result. different data files (.dat) are produced in each libs directories. 4 : Analyze the result. different data files (.dat) are produced in each libs directories.
If gnuplot is available, choose a directory name in the data directory to store the results and type If gnuplot is available, choose a directory name in the data directory to store the results and type:
cd data $ cd data
mkdir my_directory $ mkdir my_directory
cp ../libs/*/*.dat my_directory $ cp ../libs/*/*.dat my_directory
Build the data utilities in this (data) directory Build the data utilities in this (data) directory
make make
Then you can look the raw data, Then you can look the raw data,

View File

@ -12,7 +12,7 @@
#include "action_trisolve.hh" #include "action_trisolve.hh"
#include "action_symv.hh" #include "action_symv.hh"
#include "action_symm.hh" // #include "action_symm.hh"
#include "action_syr2.hh" #include "action_syr2.hh"
// #include "action_lu_solve.hh" // #include "action_lu_solve.hh"

View File

@ -15,23 +15,25 @@ find_path(ATLAS_INCLUDES
find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
# find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
# find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
# find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
# find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
# find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) if(NOT ATLAS_LAPACK)
# find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
endif(NOT ATLAS_LAPACK)
# find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
# find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
# if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_LAPACK}
# ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS} set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_CBLAS} ${ATLAS_F77BLAS} ${ATLAS_LIB})
)
# endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(ATLAS DEFAULT_MSG find_package_handle_standard_args(ATLAS DEFAULT_MSG

View File

@ -15,6 +15,10 @@ find_path(GOTO_INCLUDES
find_file(GOTO_LIBRARIES libgotoblas.so PATHS /usr/lib $ENV{GOTODIR} ${LIB_INSTALL_DIR}) find_file(GOTO_LIBRARIES libgotoblas.so PATHS /usr/lib $ENV{GOTODIR} ${LIB_INSTALL_DIR})
find_library(GOTO_LIBRARIES gotoblas PATHS $ENV{GOTODIR} ${LIB_INSTALL_DIR}) find_library(GOTO_LIBRARIES gotoblas PATHS $ENV{GOTODIR} ${LIB_INSTALL_DIR})
if(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
set(GOTO_LIBRARIES ${GOTO_LIBRARIES} "-lpthread")
endif(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX)
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(GOTO DEFAULT_MSG find_package_handle_standard_args(GOTO DEFAULT_MSG
GOTO_INCLUDES GOTO_LIBRARIES) GOTO_INCLUDES GOTO_LIBRARIES)

View File

@ -1,12 +1,14 @@
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:1024 aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:2048
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:1024 ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:2048
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:1024 atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:2048
axpby ; "{/*1.5 Y = alpha * X + beta * Y}" ; "vector size" ; 5:1000000 axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
axpy ; "{/*1.5 Y += alpha * X}" ; "vector size" ; 5:1000000 axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:1024 matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:2048
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:1024 matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:2048
trisolve ; "{/*1.5 triangular solver (X = inv(L) * X)}" ; "size" ; 4:1024 trisolve ; "{/*1.5 triangular solver (X = inv(L) X)}" ; "size" ; 4:2048
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:1024 cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:2048
lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:1024 lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:2048
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:1024 tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:2048
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:1024 hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:2048
symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:2048
syr2 ; "{/*1.5 symmetric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:2048

View File

@ -1,7 +1,20 @@
#! /bin/bash #! /bin/bash
if [ $# < 1 ]; then
echo "Usage: $0 working_directory [tiny|large [prefix]]"
else
mkdir -p $1 mkdir -p $1
##cp ../libs/*/*.dat $1 ##cp ../libs/*/*.dat $1
mode=large
if [ $# > 2 ]; then
mode=$2
fi
if [ $# > 3 ]; then
prefix=$3
fi
EIGENDIR=`cat eigen_root_dir.txt` EIGENDIR=`cat eigen_root_dir.txt`
webpagefilename=$1/index.html webpagefilename=$1/index.html
@ -18,19 +31,22 @@ echo '<ul>'\
'</ul>' \ '</ul>' \
'</p>' >> $webpagefilename '</p>' >> $webpagefilename
source mk_mean_script.sh axpy $1 11 2500 100000 250000 $2 source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $2 source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $2 source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh atv $1 11 50 300 1000 $2 source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $2 source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh aat $1 11 100 300 1000 $2 source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh ata $1 11 100 300 1000 $2 source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh trisolve $1 11 100 300 1000 $2 source mk_mean_script.sh trisolve $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh cholesky $1 11 100 300 1000 $2 source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $2 source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $2 source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $2 source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix
fi
## compile the web page ## ## compile the web page ##

View File

@ -5,6 +5,7 @@ MINIC=$3
MAXIC=$4 MAXIC=$4
MINOC=$5 MINOC=$5
MAXOC=$6 MAXOC=$6
prefix=$8
meanstatsfilename=$2/mean.html meanstatsfilename=$2/mean.html
@ -37,7 +38,7 @@ echo '<br/>' >> $meanstatsfilename
webpagefilename=$2/index.html webpagefilename=$2/index.html
# echo '<h3>'${WHAT}'</h3>' >> $webpagefilename # echo '<h3>'${WHAT}'</h3>' >> $webpagefilename
echo '<hr/><a href="/btl/'$1'.pdf"><img src="/btl/'$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename echo '<hr/><a href="'$prefix$1'.pdf"><img src="'$prefix$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename

View File

@ -37,11 +37,11 @@
// min matrix size for matrix matrix product bench // min matrix size for matrix matrix product bench
#define MIN_MM 5 #define MIN_MM 5
// max matrix size for matrix matrix product bench // max matrix size for matrix matrix product bench
#define MAX_MM 2048 #define MAX_MM MAX_MV
// min matrix size for LU bench // min matrix size for LU bench
#define MIN_LU 5 #define MIN_LU 5
// max matrix size for LU bench // max matrix size for LU bench
#define MAX_LU 1024 #define MAX_LU 2048
// max size for tiny vector and matrix // max size for tiny vector and matrix
#define TINY_MV_MAX_SIZE 16 #define TINY_MV_MAX_SIZE 16
// default nb_sample for x86 timer // default nb_sample for x86 timer

View File

@ -169,7 +169,7 @@ class BtlConfig
{ {
public: public:
BtlConfig() BtlConfig()
: overwriteResults(false) : overwriteResults(false), checkResults(true)
{ {
char * _config; char * _config;
_config = getenv ("BTL_CONFIG"); _config = getenv ("BTL_CONFIG");
@ -193,6 +193,10 @@ public:
{ {
Instance.overwriteResults = true; Instance.overwriteResults = true;
} }
else if (config[i].beginsWith("--nocheck"))
{
Instance.checkResults = false;
}
} }
} }
@ -214,6 +218,7 @@ public:
static BtlConfig Instance; static BtlConfig Instance;
bool overwriteResults; bool overwriteResults;
bool checkResults;
protected: protected:
std::vector<BtlString> m_selectedActionNames; std::vector<BtlString> m_selectedActionNames;

View File

@ -65,9 +65,12 @@ public:
time_action = time_action / (double(_nb_calc)); time_action = time_action / (double(_nb_calc));
// check // check
action.initialize(); if (BtlConfig::Instance.checkResults)
action.calculate(); {
action.check_result(); action.initialize();
action.calculate();
action.check_result();
}
return action.nb_op_base()/(time_action*1000000.0); return action.nb_op_base()/(time_action*1000000.0);
} }

View File

@ -132,7 +132,7 @@ static char notrans = 'N';
static char trans = 'T'; static char trans = 'T';
static char nonunit = 'N'; static char nonunit = 'N';
static char lower = 'L'; static char lower = 'L';
static blasint intone = 1; static int intone = 1;
template<> template<>
class C_BLAS_interface<float> : public f77_interface_base<float> class C_BLAS_interface<float> : public f77_interface_base<float>
@ -160,6 +160,14 @@ public :
cblas_ssymv(CblasColMajor,CblasLower,N,1.0,A,N,B,1,0.0,X,1); cblas_ssymv(CblasColMajor,CblasLower,N,1.0,A,N,B,1,0.0,X,1);
#endif #endif
} }
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
#ifdef PUREBLAS
ssyr2_(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
#else
cblas_ssyr2(CblasColMajor,CblasLower,N,1.0,B,1,X,1,A,N);
#endif
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
#ifdef PUREBLAS #ifdef PUREBLAS

View File

@ -41,6 +41,7 @@ int main()
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_symv<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_symv<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_syr2<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -146,6 +146,15 @@ public :
X[j] += t2; X[j] += t2;
} }
} }
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
for (int j=0; j<N; ++j)
{
for (int i=j; i<N; ++i)
A[j][i] += B[i]*X[j] + B[j]*X[i];
}
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{ {

View File

@ -31,6 +31,7 @@ int main()
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -7,7 +7,7 @@ if (EIGEN2_FOUND)
btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp) btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
btl_add_bench(btl_eigen2_matmat main_matmat.cpp) btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
btl_add_bench(btl_eigen2_adv main_adv.cpp) btl_add_bench(btl_eigen2_adv main_adv.cpp)
IF(NOT BTL_NOVEC) IF(NOT BTL_NOVEC)
btl_add_bench(btl_eigen2_novec_linear main_linear.cpp) btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp) btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)

View File

@ -18,7 +18,7 @@
#ifndef EIGEN2_INTERFACE_HH #ifndef EIGEN2_INTERFACE_HH
#define EIGEN2_INTERFACE_HH #define EIGEN2_INTERFACE_HH
// #include <cblas.h> // #include <cblas.h>
#include <Eigen/Core> #include <Eigen/Array>
#include <Eigen/Cholesky> #include <Eigen/Cholesky>
#include <Eigen/LU> #include <Eigen/LU>
#include <Eigen/QR> #include <Eigen/QR>
@ -45,7 +45,9 @@ public :
static inline std::string name( void ) static inline std::string name( void )
{ {
#if defined(EIGEN_VECTORIZE_SSE) #if defined(EIGEN_USE_NEW_PRODUCT)
if (SIZE==Dynamic) return "eigen2_newprod"; else return "tiny_eigen2";
#elif defined(EIGEN_VECTORIZE_SSE)
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
#elif defined(EIGEN_VECTORIZE_ALTIVEC) #elif defined(EIGEN_VECTORIZE_ALTIVEC)
if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2";
@ -114,7 +116,57 @@ public :
} }
static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){ static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){
X = (A.template marked<SelfAdjoint|LowerTriangular>() * B)/*.lazy()*/; //X = (A.template marked<SelfAdjoint|LowerTriangular>() * B)/*.lazy()*/;
ei_product_selfadjoint_vector<real,0,LowerTriangularBit>(N,A.data(),N, B.data(), X.data());
}
template<typename Dest, typename Src> static void triassign(Dest& dst, const Src& src)
{
typedef typename Dest::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
const int PacketSize = sizeof(Packet)/sizeof(Scalar);
int size = dst.cols();
for(int j=0; j<size; j+=1)
{
// const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
Scalar* A0 = dst.data() + j*dst.stride();
int starti = j;
int alignedEnd = starti;
int alignedStart = (starti) + ei_alignmentOffset(&A0[starti], size-starti);
alignedEnd = alignedStart + ((size-alignedStart)/(2*PacketSize))*(PacketSize*2);
// do the non-vectorizable part of the assignment
for (int index = starti; index<alignedStart ; ++index)
{
if(Dest::Flags&RowMajorBit)
dst.copyCoeff(j, index, src);
else
dst.copyCoeff(index, j, src);
}
// do the vectorizable part of the assignment
for (int index = alignedStart; index<alignedEnd; index+=PacketSize)
{
if(Dest::Flags&RowMajorBit)
dst.template copyPacket<Src, Aligned, Unaligned>(j, index, src);
else
dst.template copyPacket<Src, Aligned, Unaligned>(index, j, src);
}
// do the non-vectorizable part of the assignment
for (int index = alignedEnd; index<size; ++index)
{
if(Dest::Flags&RowMajorBit)
dst.copyCoeff(j, index, src);
else
dst.copyCoeff(index, j, src);
}
//dst.col(j).end(N-j) = src.col(j).end(N-j);
}
}
static EIGEN_DONT_INLINE void syr2(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
// ei_product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1, -1);
} }
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
@ -126,7 +178,9 @@ public :
} }
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
asm("#begin axpby");
Y = a*X + b*Y; Y = a*X + b*Y;
asm("#end axpby");
} }
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
@ -158,7 +212,10 @@ public :
} }
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
C = Tridiagonalization<gene_matrix>(X).packedMatrix(); typename Tridiagonalization<gene_matrix>::CoeffVectorType aux(N-1);
C = X;
Tridiagonalization<gene_matrix>::_compute(C, aux);
// C = Tridiagonalization<gene_matrix>(X).packedMatrix();
} }
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){

View File

@ -19,7 +19,6 @@
#include "eigen2_interface.hh" #include "eigen2_interface.hh"
#include "bench.hh" #include "bench.hh"
#include "basic_actions.hh" #include "basic_actions.hh"
#include "action_symv.hh"
BTL_MAIN; BTL_MAIN;
@ -28,6 +27,7 @@ int main()
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_symv<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_symv<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_syr2<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
return 0; return 0;
} }

View File

@ -38,16 +38,16 @@ public :
typedef typename f77_interface_base<real>::gene_vector gene_vector; typedef typename f77_interface_base<real>::gene_vector gene_vector;
static void free_matrix(gene_matrix & A, int N){ static void free_matrix(gene_matrix & A, int N){
ei_aligned_delete(A); ei_aligned_free(A);
} }
static void free_vector(gene_vector & B){ static void free_vector(gene_vector & B){
ei_aligned_delete(B); ei_aligned_free(B);
} }
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
int N = A_stl.size(); int N = A_stl.size();
A = ei_aligned_new<real>(N*N); A = (real*)ei_aligned_malloc(N*N*sizeof(real));
for (int j=0;j<N;j++) for (int j=0;j<N;j++)
for (int i=0;i<N;i++) for (int i=0;i<N;i++)
A[i+N*j] = A_stl[j][i]; A[i+N*j] = A_stl[j][i];
@ -55,7 +55,7 @@ public :
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){ static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
int N = B_stl.size(); int N = B_stl.size();
B = ei_aligned_new<real>(N); B = (real*)ei_aligned_malloc(N*sizeof(real));
for (int i=0;i<N;i++) for (int i=0;i<N;i++)
B[i] = B_stl[i]; B[i] = B_stl[i];
} }
@ -236,6 +236,129 @@ public :
} }
asm("#end matrix_vector_product"); asm("#end matrix_vector_product");
} }
static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
{
// int AN = (N/PacketSize)*PacketSize;
// int ANP = (AN/(2*PacketSize))*2*PacketSize;
// int bound = (N/4)*4;
for (int i=0;i<N;i++)
X[i] = 0;
int bound = std::max(0,N-8) & 0xfffffffE;
for (int j=0;j<bound;j+=2)
{
register real* __restrict__ A0 = A + j*N;
register real* __restrict__ A1 = A + (j+1)*N;
real t0 = B[j];
Packet ptmp0 = ei_pset1(t0);
real t1 = B[j+1];
Packet ptmp1 = ei_pset1(t1);
real t2 = 0;
Packet ptmp2 = ei_pset1(t2);
real t3 = 0;
Packet ptmp3 = ei_pset1(t3);
int starti = j+2;
int alignedEnd = starti;
int alignedStart = (starti) + ei_alignmentOffset(&X[starti], N-starti);
alignedEnd = alignedStart + ((N-alignedStart)/(PacketSize))*(PacketSize);
X[j] += t0 * A0[j];
X[j+1] += t1 * A1[j];
X[j+1] += t0 * A0[j+1];
t2 += A0[j+1] * B[j+1];
// alignedStart = alignedEnd;
for (int i=starti; i<alignedStart; ++i) {
X[i] += t0 * A0[i] + t1 * A1[i];
t2 += A0[i] * B[i];
t3 += A1[i] * B[i];
}
asm("#begin symv");
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) {
Packet A0i = ei_ploadu(&A0[i]);
Packet A1i = ei_ploadu(&A1[i]);
// Packet A0i1 = ei_ploadu(&A0[i+PacketSize]);
Packet Xi = ei_pload(&X[i]);
Packet Bi = ei_pload/*u*/(&B[i]);
// Packet Xi1 = ei_pload(&X[i+PacketSize]);
// Packet Bi1 = ei_pload/*u*/(&B[i+PacketSize]);
Xi = ei_padd(ei_padd(Xi, ei_pmul(ptmp0, A0i)), ei_pmul(ptmp1, A1i));
ptmp2 = ei_padd(ptmp2, ei_pmul(A0i, Bi));
ptmp3 = ei_padd(ptmp3, ei_pmul(A1i, Bi));
// Xi1 = ei_padd(Xi1, ei_pmul(ptmp1, A0i1));
// ptmp2 = ei_padd(ptmp2, ei_pmul(A0i1, Bi1));
//
ei_pstore(&X[i],Xi);
// ei_pstore(&X[i+PacketSize],Xi1);
// asm(
// "prefetchnta 64(%[A0],%[i],4) \n\t"
// //"movups (%[A0],%[i],4), %%xmm8 \n\t"
// "movsd (%[A0],%[i],4), %%xmm8 \n\t"
// "movhps 8(%[A0],%[i],4), %%xmm8 \n\t"
// // "movups 16(%[A0],%[i],4), %%xmm9 \n\t"
// // "movups 64(%[A0],%[i],4), %%xmm15 \n\t"
// "movaps (%[B], %[i],4), %%xmm12 \n\t"
// // "movaps 16(%[B], %[i],4), %%xmm13 \n\t"
// "movaps (%[X], %[i],4), %%xmm10 \n\t"
// // "movaps 16(%[X], %[i],4), %%xmm11 \n\t"
//
// "mulps %%xmm8, %%xmm12 \n\t"
// // "mulps %%xmm9, %%xmm13 \n\t"
//
// "mulps %[ptmp1], %%xmm8 \n\t"
// "addps %%xmm12, %[ptmp2] \n\t"
// "addps %%xmm8, %%xmm10 \n\t"
//
//
//
//
// // "mulps %[ptmp1], %%xmm9 \n\t"
//
// // "addps %%xmm9, %%xmm11 \n\t"
// // "addps %%xmm13, %[ptmp2] \n\t"
//
// "movaps %%xmm10, (%[X],%[i],4) \n\t"
// // "movaps %%xmm11, 16(%[X],%[i],4) \n\t"
// :
// : [X] "r" (X), [i] "r" (i), [A0] "r" (A0),
// [B] "r" (B),
// [ptmp1] "x" (ptmp1),
// [ptmp2] "x" (ptmp2)
// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm15");
}
asm("#end symv");
for (int i=alignedEnd; i<N; i++) {
X[i] += t0 * A0[i] + t1 * A1[i];
t2 += A0[i] * B[i];
t3 += A1[i] * B[i];
}
X[j] += t2 + ei_predux(ptmp2);
X[j+1] += t3 + ei_predux(ptmp3);
}
for (int j=bound;j<N;j++)
{
register real* __restrict__ A0 = A + j*N;
real t1 = B[j];
real t2 = 0;
X[j] += t1 * A0[j];
for (int i=j+1; i<N; i+=PacketSize) {
X[i] += t1 * A0[i];
t2 += A0[i] * B[i];
}
X[j] += t2;
}
}
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N) // static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
// { // {

View File

@ -26,6 +26,7 @@
#include "action_axpy.hh" #include "action_axpy.hh"
#include "action_ata_product.hh" #include "action_ata_product.hh"
#include "action_aat_product.hh" #include "action_aat_product.hh"
#include "basic_actions.hh"
//#include "action_lu_solve.hh" //#include "action_lu_solve.hh"
// #include "timers/mixed_perf_analyzer.hh" // #include "timers/mixed_perf_analyzer.hh"
@ -39,6 +40,7 @@ int main()
// bench<Action_matrix_matrix_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); // bench<Action_matrix_matrix_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); // bench<Action_aat_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); // bench<Action_ata_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_symv<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_axpy<hand_vec_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_axpy<hand_vec_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);