* updated benchmark files according to recent renamings

* various improvements in BTL including trisolver and cholesky bench
This commit is contained in:
Gael Guennebaud 2008-07-27 11:39:47 +00:00
parent e9e5261664
commit 93115619c2
48 changed files with 1212 additions and 279 deletions

View File

@ -29,6 +29,7 @@
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include <cstdlib> #include <cstdlib>
#include <numeric>
namespace Eigen namespace Eigen
{ {
@ -39,11 +40,11 @@ class BenchTimer
{ {
public: public:
BenchTimer() : m_best(1e12) {} BenchTimer() { reset(); }
~BenchTimer() {} ~BenchTimer() {}
inline void reset(void) {m_best = 1e12;} inline void reset(void) {m_best = 1e6;}
inline void start(void) {m_start = getTime();} inline void start(void) {m_start = getTime();}
inline void stop(void) inline void stop(void)
{ {

View File

@ -99,9 +99,9 @@ int main(int argc, char *argv[])
Scalar alpha, beta; Scalar alpha, beta;
MyMatrix ma(M,K), mb(K,N), mc(M,N); MyMatrix ma(M,K), mb(K,N), mc(M,N);
ma = MyMatrix::random(M,K); ma = MyMatrix::Random(M,K);
mb = MyMatrix::random(K,N); mb = MyMatrix::Random(K,N);
mc = MyMatrix::random(M,N); mc = MyMatrix::Random(M,N);
Eigen::BenchTimer timer; Eigen::BenchTimer timer;
@ -132,9 +132,9 @@ int main(int argc, char *argv[])
} }
// clear // clear
ma = MyMatrix::random(M,K); ma = MyMatrix::Random(M,K);
mb = MyMatrix::random(K,N); mb = MyMatrix::Random(K,N);
mc = MyMatrix::random(M,N); mc = MyMatrix::Random(M,N);
// eigen // eigen
// if (!(std::string(argv[1])=="auto")) // if (!(std::string(argv[1])=="auto"))
@ -153,9 +153,9 @@ int main(int argc, char *argv[])
} }
// clear // clear
ma = MyMatrix::random(M,K); ma = MyMatrix::Random(M,K);
mb = MyMatrix::random(K,N); mb = MyMatrix::Random(K,N);
mc = MyMatrix::random(M,N); mc = MyMatrix::Random(M,N);
// eigen normal // eigen normal
if (!(std::string(argv[1])=="auto")) if (!(std::string(argv[1])=="auto"))
@ -193,11 +193,11 @@ void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb
void check_product(int M, int N, int K) void check_product(int M, int N, int K)
{ {
MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N); MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
ma = MyMatrix::random(M,K); ma = MyMatrix::Random(M,K);
mb = MyMatrix::random(K,N); mb = MyMatrix::Random(K,N);
maT = ma.transpose(); maT = ma.transpose();
mbT = mb.transpose(); mbT = mb.transpose();
mc = MyMatrix::random(M,N); mc = MyMatrix::Random(M,N);
MyMatrix::Scalar eps = 1e-4; MyMatrix::Scalar eps = 1e-4;

View File

@ -40,7 +40,7 @@ __attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType; typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
MatrixType a = MatrixType::random(rows,cols); MatrixType a = MatrixType::Random(rows,cols);
SquareMatrixType covMat = a * a.adjoint(); SquareMatrixType covMat = a * a.adjoint();
BenchTimer timerSa, timerStd; BenchTimer timerSa, timerStd;

View File

@ -18,7 +18,7 @@ USING_PART_OF_NAMESPACE_EIGEN
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::ones(); Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones();
Matrix<SCALAR,MATSIZE,MATSIZE> m; Matrix<SCALAR,MATSIZE,MATSIZE> m;
for(int i = 0; i < MATSIZE; i++) for(int i = 0; i < MATSIZE; i++)
for(int j = 0; j < MATSIZE; j++) for(int j = 0; j < MATSIZE; j++)
@ -28,7 +28,7 @@ int main(int argc, char *argv[])
asm("#begin"); asm("#begin");
for(int a = 0; a < REPEAT; a++) for(int a = 0; a < REPEAT; a++)
{ {
m = Matrix<SCALAR,MATSIZE,MATSIZE>::ones() + 0.00005 * (m + (m*m)); m = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones() + 0.00005 * (m + (m*m));
} }
asm("#end"); asm("#end");
cout << m << endl; cout << m << endl;

View File

@ -26,7 +26,7 @@ int main(int argc, char *argv[])
c = Eigen::ei_random<int>(0,10); c = Eigen::ei_random<int>(0,10);
nr = Eigen::ei_random<int>(50,80); nr = Eigen::ei_random<int>(50,80);
nc = Eigen::ei_random<int>(50,80); nc = Eigen::ei_random<int>(50,80);
m.block(r,c,nr,nc) += Mat::ones(nr,nc); m.block(r,c,nr,nc) += Mat::Ones(nr,nc);
m.block(r,c,nr,nc) *= SCALAR(10); m.block(r,c,nr,nc) *= SCALAR(10);
m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10); m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
m.block(r,c,nr,nc) /= SCALAR(10); m.block(r,c,nr,nc) /= SCALAR(10);

View File

@ -18,7 +18,7 @@ USING_PART_OF_NAMESPACE_EIGEN
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
MATTYPE I = MATTYPE::ones(MATSIZE,MATSIZE); MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE);
MATTYPE m(MATSIZE,MATSIZE); MATTYPE m(MATSIZE,MATSIZE);
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++) for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
{ {

View File

@ -19,7 +19,7 @@ USING_PART_OF_NAMESPACE_EIGEN
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
VECTYPE I = VECTYPE::ones(VECSIZE); VECTYPE I = VECTYPE::Ones(VECSIZE);
VECTYPE m(VECSIZE,1); VECTYPE m(VECSIZE,1);
for(int i = 0; i < VECSIZE; i++) for(int i = 0; i < VECSIZE; i++)
{ {
@ -27,7 +27,7 @@ int main(int argc, char *argv[])
} }
for(int a = 0; a < REPEAT; a++) for(int a = 0; a < REPEAT; a++)
{ {
m = VECTYPE::ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4); m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
} }
cout << m[0] << endl; cout << m[0] << endl;
return 0; return 0;

View File

@ -26,6 +26,11 @@ include_directories(
${PROJECT_SOURCE_DIR}/generic_bench/utils ${PROJECT_SOURCE_DIR}/generic_bench/utils
${PROJECT_SOURCE_DIR}/libs/STL) ${PROJECT_SOURCE_DIR}/libs/STL)
# find_package(MKL)
# if (MKL_FOUND)
# add_definitions(-DHAVE_MKL)
# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
# endif (MKL_FOUND)
MACRO(BTL_ADD_BENCH targetname) MACRO(BTL_ADD_BENCH targetname)
@ -49,6 +54,7 @@ MACRO(BTL_ADD_BENCH targetname)
IF(BUILD_${targetname}) IF(BUILD_${targetname})
ADD_EXECUTABLE(${targetname} ${_sources}) ADD_EXECUTABLE(${targetname} ${_sources})
ADD_TEST(${targetname} "${targetname}") ADD_TEST(${targetname} "${targetname}")
target_link_libraries(${targetname} ${DEFAULT_LIBRARIES})
ENDIF(BUILD_${targetname}) ENDIF(BUILD_${targetname})
ENDMACRO(BTL_ADD_BENCH) ENDMACRO(BTL_ADD_BENCH)

View File

@ -29,7 +29,18 @@ BTL uses cmake / ctest:
$ ctest -V $ ctest -V
You can also run a single bench, e.g.: ctest -V -R eigen You can run the benchmarks only on libraries matching a given regular expression:
ctest -V -R <regexp>
For instance:
ctest -V -R eigen2
You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
BTL_CONFIG="-a action1{:action2}*" ctest -V
An exemple:
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
4 : Analyze the result. different data files (.dat) are produced in each libs directories. 4 : Analyze the result. different data files (.dat) are produced in each libs directories.
If gnuplot is available, choose a directory name in the data directory to store the results and type If gnuplot is available, choose a directory name in the data directory to store the results and type

View File

@ -0,0 +1,124 @@
//=====================================================
// File : action_axpby.hh
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_AXPBY
#define ACTION_AXPBY
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_axpby {
public :
// Ctor
Action_axpby( int size ):_size(size),_alpha(0.5),_beta(0.95)
{
MESSAGE("Action_axpby Ctor");
// STL vector initialization
init_vector<pseudo_random>(X_stl,_size);
init_vector<pseudo_random>(Y_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(Y_ref,Y_stl);
Interface::vector_from_stl(X,X_stl);
Interface::vector_from_stl(Y,Y_stl);
}
// invalidate copy ctor
Action_axpby( const Action_axpby & )
{
INFOS("illegal call to Action_axpby Copy Ctor");
exit(1);
}
// Dtor
~Action_axpby( void ){
MESSAGE("Action_axpby Dtor");
// deallocation
Interface::free_vector(X_ref);
Interface::free_vector(Y_ref);
Interface::free_vector(X);
Interface::free_vector(Y);
}
// action name
static inline std::string name( void )
{
return "axpby_"+Interface::name();
}
double nb_op_base( void ){
return 3.0*_size;
}
inline void initialize( void ){
Interface::copy_vector(X_ref,X,_size);
Interface::copy_vector(Y_ref,Y,_size);
}
inline void calculate( void ) {
Interface::axpby(_alpha,X,_beta,Y,_size);
}
void check_result( void ){
// calculation check
Interface::vector_to_stl(Y,resu_stl);
STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(2);
}
}
private :
typename Interface::stl_vector X_stl;
typename Interface::stl_vector Y_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_vector X_ref;
typename Interface::gene_vector Y_ref;
typename Interface::gene_vector X;
typename Interface::gene_vector Y;
typename Interface::real_type _alpha;
typename Interface::real_type _beta;
int _size;
};
#endif

View File

@ -0,0 +1,132 @@
//=====================================================
// File : action_cholesky.hh
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_CHOLESKY
#define ACTION_CHOLESKY
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_cholesky {
public :
// Ctor
Action_cholesky( int size ):_size(size)
{
MESSAGE("Action_cholesky Ctor");
// STL vector initialization
typename Interface::stl_matrix tmp;
init_matrix<pseudo_random>(tmp,_size);
init_matrix<null_function>(X_stl,_size);
STL_interface<typename Interface::real_type>::ata_product(tmp,X_stl,_size);
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
_cost = 0;
for (int j=0; j<_size; ++j)
{
int r = std::max(_size - j -1,0);
_cost += 2*(r*j+r+j);
}
}
// invalidate copy ctor
Action_cholesky( const Action_cholesky & )
{
INFOS("illegal call to Action_cholesky Copy Ctor");
exit(1);
}
// Dtor
~Action_cholesky( void ){
MESSAGE("Action_cholesky Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
}
// action name
static inline std::string name( void )
{
return "cholesky_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::cholesky(X,C,_size);
}
void check_result( void ){
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix X;
typename Interface::gene_matrix C;
int _size;
int _cost;
};
#endif

View File

@ -49,11 +49,10 @@ public :
// generic matrix and vector initialization // generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl); Interface::matrix_from_stl(A_ref,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl); Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl); Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl); Interface::vector_from_stl(X,X_stl);
} }

View File

@ -0,0 +1,136 @@
//=====================================================
// File : action_trisolve.hh
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_TRISOLVE
#define ACTION_TRISOLVE
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_trisolve {
public :
// Ctor
Action_trisolve( int size ):_size(size)
{
MESSAGE("Action_trisolve Ctor");
// STL vector initialization
init_matrix<pseudo_random>(L_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
for (int j=0; j<_size; ++j)
{
for (int i=0; i<j; ++i)
L_stl[j][i] = 0;
L_stl[j][j] += 3;
}
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(L,L_stl);
Interface::vector_from_stl(X,X_stl);
Interface::vector_from_stl(B,B_stl);
_cost = 0;
for (int j=0; j<_size; ++j)
{
_cost += 2*j + 1;
}
}
// invalidate copy ctor
Action_trisolve( const Action_trisolve & )
{
INFOS("illegal call to Action_trisolve Copy Ctor");
exit(1);
}
// Dtor
~Action_trisolve( void ){
MESSAGE("Action_trisolve Dtor");
// deallocation
Interface::free_matrix(L,_size);
Interface::free_vector(B);
Interface::free_vector(X);
}
// action name
static inline std::string name( void )
{
return "trisolve_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
//Interface::copy_vector(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::trisolve_lower(L,B,X,_size);
}
void check_result( void ){
// calculation check
Interface::vector_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-4){
INFOS("WRONG CALCULATION...residual=" << error);
exit(2);
} //else INFOS("CALCULATION OK...residual=" << error);
}
private :
typename Interface::stl_matrix L_stl;
typename Interface::stl_vector X_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_matrix L;
typename Interface::gene_vector X;
typename Interface::gene_vector B;
int _size;
int _cost;
};
#endif

View File

@ -0,0 +1,15 @@
#include "action_axpy.hh"
#include "action_axpby.hh"
#include "action_matrix_vector_product.hh"
#include "action_atv_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_trisolve.hh"
// #include "action_lu_solve.hh"

View File

@ -0,0 +1,35 @@
# include(FindLibraryWithDebug)
if (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
set(ATLAS_FIND_QUIETLY TRUE)
endif (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
find_path(ATLAS_INCLUDES
NAMES
cblas.h
PATHS
$ENV{ATLASDIR}/include
${INCLUDE_INSTALL_DIR}
)
find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS})
endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(ATLAS DEFAULT_MSG
ATLAS_INCLUDES ATLAS_LIBRARIES)
mark_as_advanced(ATLAS_INCLUDES ATLAS_LIBRARIES)

View File

@ -1,7 +1,7 @@
ADD_CUSTOM_TARGET(copy_scripts) ADD_CUSTOM_TARGET(copy_scripts)
SET(script_files go_mean aat.hh ata.hh axpy.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh ) SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
FOREACH(script_file ${script_files}) FOREACH(script_file ${script_files})
ADD_CUSTOM_COMMAND( ADD_CUSTOM_COMMAND(

3
bench/btl/data/axpby.hh Normal file
View File

@ -0,0 +1,3 @@
set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000
set xlabel "vector size" 0.000000,0.000000
set xrange [1:1000000]

View File

@ -0,0 +1,3 @@
set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000
set xlabel "matrix size" 0.000000,0.000000
set xrange [6:1000]

View File

@ -17,11 +17,13 @@ echo '<ul>'\
'</p>' >> $webpagefilename '</p>' >> $webpagefilename
source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html
source mk_mean_script.sh matrix_vector $1 11 50 300 500 > $1/matrix_vector.html source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html
source mk_mean_script.sh atv $1 11 50 300 500 > $1/atv.html source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html
# source mk_mean_script.sh matrix_matrix $1 11 100 300 500 > $1/matrix_matrix.html source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html
# source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
# source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html
source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html
## compile the web page ## ## compile the web page ##

View File

@ -23,11 +23,11 @@
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include "bench_parameter.hh" #include "bench_parameter.hh"
#include "utils/xy_file.hh"
#include <set> #include <set>
using namespace std; using namespace std;
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max); double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
class Lib_Mean{ class Lib_Mean{
@ -152,29 +152,6 @@ int main( int argc , char *argv[] )
} }
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
ifstream input_file (filename.c_str(),ios::in) ;
if (!input_file){
INFOS("!!! Error opening "<<filename);
exit(0);
}
int nb_point=0;
int size=0;
double mflops=0;
while (input_file >> size >> mflops ){
nb_point++;
tab_sizes.push_back(size);
tab_mflops.push_back(mflops);
}
SCRUTE(nb_point);
input_file.close();
}
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){ double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
int size=tab_sizes.size(); int size=tab_sizes.size();

View File

@ -0,0 +1,3 @@
set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000
set xlabel "matrix-vector size" 0.000000,0.000000
set xrange [6:1000]

View File

@ -25,14 +25,16 @@
#include <iostream> #include <iostream>
#include "utilities.h" #include "utilities.h"
#include "size_lin_log.hh" #include "size_lin_log.hh"
#include "dump_file_x_y.hh" #include "xy_file.hh"
#include <vector> #include <vector>
#include <string> #include <string>
#include "timers/portable_perf_analyzer.hh" #include "timers/portable_perf_analyzer.hh"
// #include "timers/mixed_perf_analyzer.hh" // #include "timers/mixed_perf_analyzer.hh"
// #include "timers/x86_perf_analyzer.hh" // #include "timers/x86_perf_analyzer.hh"
// #include "timers/STL_perf_analyzer.hh" // #include "timers/STL_perf_analyzer.hh"
#ifdef HAVE_MKL
extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
#endif
using namespace std; using namespace std;
template <template<class> class Perf_Analyzer, class Action> template <template<class> class Perf_Analyzer, class Action>
@ -41,8 +43,6 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
if (BtlConfig::skipAction(Action::name())) if (BtlConfig::skipAction(Action::name()))
return; return;
BTL_DISABLE_SSE_EXCEPTIONS();
string filename="bench_"+Action::name()+".dat"; string filename="bench_"+Action::name()+".dat";
INFOS("starting " <<filename); INFOS("starting " <<filename);
@ -64,14 +64,71 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
{ {
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")"); //INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush; std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
BTL_DISABLE_SSE_EXCEPTIONS();
#ifdef HAVE_MKL
{
float dummy;
cblas_saxpy(1,0,&dummy,1,&dummy,1);
}
#endif
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]); tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl; std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
} }
// std::cout << "\n";
if (!BtlConfig::Instance.overwriteResults)
{
std::vector<int> oldSizes;
std::vector<double> oldFlops;
if (read_xy_file(filename, oldSizes, oldFlops, true))
{
// merge the two data
std::vector<int> newSizes;
std::vector<double> newFlops;
int i=0;
int j=0;
while (i<tab_sizes.size() && j<oldSizes.size())
{
if (tab_sizes[i] == oldSizes[j])
{
newSizes.push_back(tab_sizes[i]);
newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
++i;
++j;
}
else if (tab_sizes[i] < oldSizes[j])
{
newSizes.push_back(tab_sizes[i]);
newFlops.push_back(tab_mflops[i]);
++i;
}
else
{
newSizes.push_back(oldSizes[j]);
newFlops.push_back(oldFlops[j]);
++j;
}
}
while (i<tab_sizes.size())
{
newSizes.push_back(tab_sizes[i]);
newFlops.push_back(tab_mflops[i]);
++i;
}
while (j<oldSizes.size())
{
newSizes.push_back(oldSizes[j]);
newFlops.push_back(oldFlops[j]);
++j;
}
tab_mflops = newFlops;
tab_sizes = newSizes;
}
}
// dump the result in a file : // dump the result in a file :
dump_xy_file(tab_sizes,tab_mflops,filename);
dump_file_x_y(tab_sizes,tab_mflops,filename);
} }

View File

@ -23,7 +23,7 @@
// minimal time for each measurement // minimal time for each measurement
#define REAL_TYPE float #define REAL_TYPE float
// minimal time for each measurement // minimal time for each measurement
#define MIN_TIME 0.5 #define MIN_TIME 1.0
// nb of point on bench curves // nb of point on bench curves
#define NB_POINT 100 #define NB_POINT 100
// min vector size for axpy bench // min vector size for axpy bench

View File

@ -48,7 +48,7 @@
: : [aux] "m" (aux)); \ : : [aux] "m" (aux)); \
} }
#else #else
#define DISABLE_SSE_EXCEPTIONS() #define BTL_DISABLE_SSE_EXCEPTIONS()
#endif #endif
/** Enhanced std::string /** Enhanced std::string
@ -163,7 +163,7 @@ class BtlConfig
{ {
public: public:
BtlConfig() BtlConfig()
: m_runSingleAction(false) : overwriteResults(false)
{ {
char * _config; char * _config;
_config = getenv ("BTL_CONFIG"); _config = getenv ("BTL_CONFIG");
@ -179,33 +179,38 @@ public:
std::cerr << "error processing option: " << config[i] << "\n"; std::cerr << "error processing option: " << config[i] << "\n";
exit(2); exit(2);
} }
Instance.m_runSingleAction = true; Instance.m_selectedActionNames = config[i+1].split(":");
Instance.m_singleActionName = config[i+1];
i += 1; i += 1;
} }
else if (config[i].beginsWith("--overwrite"))
{
Instance.overwriteResults = true;
}
} }
} }
BTL_DISABLE_SSE_EXCEPTIONS(); BTL_DISABLE_SSE_EXCEPTIONS();
} }
BTL_DONT_INLINE static bool skipAction(const std::string& name) BTL_DONT_INLINE static bool skipAction(const std::string& _name)
{ {
if (Instance.m_runSingleAction) if (Instance.m_selectedActionNames.empty())
{
return !BtlString(name).contains(Instance.m_singleActionName);
}
return false; return false;
BtlString name(_name);
for (int i=0; i<Instance.m_selectedActionNames.size(); ++i)
if (name.contains(Instance.m_selectedActionNames[i]))
return false;
return true;
} }
protected:
bool m_runSingleAction;
BtlString m_singleActionName;
static BtlConfig Instance; static BtlConfig Instance;
bool overwriteResults;
protected:
std::vector<BtlString> m_selectedActionNames;
}; };
#define BTL_MAIN \ #define BTL_MAIN \

View File

@ -24,7 +24,7 @@
#include "bench_parameter.hh" #include "bench_parameter.hh"
#include <iostream> #include <iostream>
#include "utilities.h" #include "utilities.h"
#include "dump_file_x_y.hh" #include "xy_file.hh"
#include "static/static_size_generator.hh" #include "static/static_size_generator.hh"
#include "timers/portable_perf_analyzer.hh" #include "timers/portable_perf_analyzer.hh"
#include "timers/mixed_perf_analyzer.hh" #include "timers/mixed_perf_analyzer.hh"

View File

@ -38,16 +38,16 @@ public:
MESSAGE("Portable_Perf_Analyzer Dtor"); MESSAGE("Portable_Perf_Analyzer Dtor");
}; };
BTL_DONT_INLINE double eval_mflops(int size) BTL_DONT_INLINE double eval_mflops(int size)
{ {
Action action(size); Action action(size);
double time_action = 0;
double time_action = time_calculate(action); action.initialize();
time_action = time_calculate(action);
while (time_action < MIN_TIME) while (time_action < MIN_TIME)
{ {
//Action action(size);
_nb_calc *= 2; _nb_calc *= 2;
action.initialize(); action.initialize();
time_action = time_calculate(action); time_action = time_calculate(action);
@ -56,8 +56,10 @@ public:
// optimize // optimize
for (int i=1; i<NB_TRIES; ++i) for (int i=1; i<NB_TRIES; ++i)
{ {
action.initialize(); Action _action(size);
time_action = std::min(time_action, time_calculate(action)); std::cout << " " << _action.nb_op_base()*_nb_calc/(time_action*1e6) << " ";
_action.initialize();
time_action = std::min(time_action, time_calculate(_action));
} }
time_action = time_action / (double(_nb_calc)); time_action = time_action / (double(_nb_calc));
@ -66,13 +68,13 @@ public:
action.initialize(); action.initialize();
action.calculate(); action.calculate();
action.check_result(); action.check_result();
return action.nb_op_base()/(time_action*1000000.0); return action.nb_op_base()/(time_action*1000000.0);
} }
BTL_DONT_INLINE double time_calculate(Action & action) BTL_DONT_INLINE double time_calculate(Action & action)
{ {
// time measurement // time measurement
action.calculate();
_chronos.start(); _chronos.start();
for (int ii=0;ii<_nb_calc;ii++) for (int ii=0;ii<_nb_calc;ii++)
{ {

View File

@ -39,6 +39,38 @@
// A timer object measures CPU time. // A timer object measures CPU time.
// class Portable_Timer
// {
// public:
//
// Portable_Timer( void )
// {
// }
//
//
// void start() { m_val = getTime(); }
//
// void stop() { m_val = getTime() - m_val; }
//
// double elapsed() { return m_val; }
//
// double user_time() { return elapsed(); }
//
//
// private:
//
// static inline double getTime(void)
// {
// struct timeval tv;
// struct timezone tz;
// gettimeofday(&tv, &tz);
// return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
// }
//
// double m_val;
//
// }; // Portable_Timer
class Portable_Timer class Portable_Timer
{ {
public: public:
@ -46,42 +78,42 @@ class Portable_Timer
Portable_Timer( void ):_utime_sec_start(-1), Portable_Timer( void ):_utime_sec_start(-1),
_utime_usec_start(-1), _utime_usec_start(-1),
_utime_sec_stop(-1), _utime_sec_stop(-1),
_utime_usec_stop(-1) _utime_usec_stop(-1)/*,
m_prev_cs(-1)*/
{ {
} }
void start() void start()
{ {
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ; int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
// _start_time = std::clock();
_start_time = std::clock();
_utime_sec_start = resourcesUsage.ru_utime.tv_sec ; _utime_sec_start = resourcesUsage.ru_utime.tv_sec ;
_utime_usec_start = resourcesUsage.ru_utime.tv_usec ; _utime_usec_start = resourcesUsage.ru_utime.tv_usec ;
// m_prev_cs = resourcesUsage.ru_nivcsw;
} }
void stop() void stop()
{ {
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ; int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
// _stop_time = std::clock();
_stop_time = std::clock();
_utime_sec_stop = resourcesUsage.ru_utime.tv_sec ; _utime_sec_stop = resourcesUsage.ru_utime.tv_sec ;
_utime_usec_stop = resourcesUsage.ru_utime.tv_usec ; _utime_usec_stop = resourcesUsage.ru_utime.tv_usec ;
// m_prev_cs = resourcesUsage.ru_nivcsw - m_prev_cs;
// std::cerr << resourcesUsage.ru_nvcsw << " + " << resourcesUsage.ru_nivcsw << "\n";
} }
double elapsed() double elapsed()
{ {
return double(_stop_time - _start_time) / CLOCKS_PER_SEC; return user_time();//double(_stop_time - _start_time) / CLOCKS_PER_SEC;
} }
double user_time() double user_time()
{ {
// std::cout << m_prev_cs << "\n";
long tot_utime_sec=_utime_sec_stop-_utime_sec_start; long tot_utime_sec=_utime_sec_stop-_utime_sec_start;
long tot_utime_usec=_utime_usec_stop-_utime_usec_start; long tot_utime_usec=_utime_usec_stop-_utime_usec_start;
return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ; return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ;
@ -98,6 +130,8 @@ private:
long _utime_sec_stop ; long _utime_sec_stop ;
long _utime_usec_stop ; long _utime_usec_stop ;
// long m_prev_cs;
std::clock_t _start_time; std::clock_t _start_time;
std::clock_t _stop_time; std::clock_t _stop_time;

View File

@ -17,10 +17,41 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
// //
#ifndef DUMP_FILE_X_Y_HH #ifndef XY_FILE_HH
#define DUMP_FILE_X_Y_HH #define XY_FILE_HH
#include <fstream> #include <fstream>
#include <iostream>
#include <string> #include <string>
#include <vector>
using namespace std;
bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
std::vector<double> & tab_mflops, bool quiet = false)
{
std::ifstream input_file (filename.c_str(),std::ios::in);
if (!input_file){
if (!quiet) {
INFOS("!!! Error opening "<<filename);
}
return false;
}
int nb_point=0;
int size=0;
double mflops=0;
while (input_file >> size >> mflops ){
nb_point++;
tab_sizes.push_back(size);
tab_mflops.push_back(mflops);
}
SCRUTE(nb_point);
input_file.close();
return true;
}
// The Vector class must satisfy the following part of STL vector concept : // The Vector class must satisfy the following part of STL vector concept :
// resize() method // resize() method
@ -30,17 +61,14 @@
using namespace std; using namespace std;
template<class Vector_A, class Vector_B> template<class Vector_A, class Vector_B>
void dump_file_x_y(const Vector_A & X, const Vector_B & Y, const std::string & filename){ void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){
ofstream outfile (filename.c_str(),ios::out) ; ofstream outfile (filename.c_str(),ios::out) ;
int size=X.size(); int size=X.size();
for (int i=0;i<size;i++){ for (int i=0;i<size;i++)
outfile << X[i] << " " << Y[i] << endl; outfile << X[i] << " " << Y[i] << endl;
}
outfile.close(); outfile.close();
} }

View File

@ -1,13 +1,13 @@
find_package(CBLAS) find_package(ATLAS)
if (CBLAS_FOUND) if (ATLAS_FOUND)
include_directories(${CBLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77) include_directories(${ATLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
btl_add_bench(btl_cblas main.cpp) btl_add_bench(btl_atlas main.cpp)
if(BUILD_btl_cblas) if(BUILD_btl_atlas)
target_link_libraries(btl_cblas ${CBLAS_LIBRARIES}) target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
set_target_properties(btl_cblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS") set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
endif(BUILD_btl_cblas) endif(BUILD_btl_atlas)
endif (CBLAS_FOUND) endif (ATLAS_FOUND)
find_package(MKL) find_package(MKL)
if (MKL_FOUND) if (MKL_FOUND)
@ -15,6 +15,6 @@ if (MKL_FOUND)
btl_add_bench(btl_mkl main.cpp) btl_add_bench(btl_mkl main.cpp)
if(BUILD_btl_mkl) if(BUILD_btl_mkl)
target_link_libraries(btl_mkl ${MKL_LIBRARIES}) target_link_libraries(btl_mkl ${MKL_LIBRARIES})
set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL") set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
endif(BUILD_btl_mkl) endif(BUILD_btl_mkl)
endif (MKL_FOUND) endif (MKL_FOUND)

View File

@ -25,6 +25,11 @@
extern "C" extern "C"
{ {
#include "cblas.h" #include "cblas.h"
#ifdef HAS_LAPACK
// Cholesky Factorization
void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
#endif
} }
#define MAKE_STRING2(S) #S #define MAKE_STRING2(S) #S
@ -53,31 +58,31 @@ public :
cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1); cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
} }
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
{
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
} }
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
{
cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
} }
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
{
cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
} }
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
{
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
} }
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N) static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
{
cblas_daxpy(N,coef,X,1,Y,1); cblas_daxpy(N,coef,X,1,Y,1);
} }
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
cblas_dscal(N,b,Y,1);
cblas_daxpy(N,a,X,1,Y,1);
}
}; };
template<> template<>
@ -90,41 +95,62 @@ public :
return MAKE_STRING(CBLASNAME); return MAKE_STRING(CBLASNAME);
} }
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
{
cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1); cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
} }
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
{
cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1); cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
} }
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
{
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
} }
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
{
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N); cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
} }
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
{
cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
} }
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
{
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N); cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
} }
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N) static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N){
{
cblas_saxpy(N,coef,X,1,Y,1); cblas_saxpy(N,coef,X,1,Y,1);
} }
static inline void axpby(float a, const gene_vector & X, float b, gene_vector & Y, int N){
cblas_sscal(N,b,Y,1);
cblas_saxpy(N,a,X,1,Y,1);
}
#ifdef HAS_LAPACK
static inline void cholesky(const gene_vector & X, gene_vector & C, int N){
cblas_scopy(N*N, X, 1, C, 1);
char uplo = 'L';
int info = 0;
spotrf_(&uplo, &N, C, &N, &info);
// float tmp[N];
// for (int j = 1; j < N; ++j)
// {
// int endSize = N-j-1;
// if (endSize>0) {
//cblas_sgemv(CblasColMajor, CblasNoTrans, N-j-1, j, ATL_rnone, A+j+1, lda, A+j, lda, ATL_rone, Ac+j+1, 1);
// cblas_sgemv(CblasColMajor, CblasNoTrans,endSize,j, 1.0, &(C[j+1]),N, &(C[j]),N, 0.0, &(C[j+1+N*j]),1);
// }
// }
}
#endif
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
cblas_scopy(N, B, 1, X, 1);
cblas_strsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, 1);
}
}; };

View File

@ -20,13 +20,11 @@
#include "utilities.h" #include "utilities.h"
#include "C_BLAS_interface.hh" #include "C_BLAS_interface.hh"
#include "bench.hh" #include "bench.hh"
#include "action_matrix_vector_product.hh" #include "basic_actions.hh"
#include "action_matrix_matrix_product.hh"
#include "action_atv_product.hh" #ifdef HAS_LAPACK
#include "action_axpy.hh" #include "action_cholesky.hh"
#include "action_lu_solve.hh" #endif
#include "action_ata_product.hh"
#include "action_aat_product.hh"
BTL_MAIN; BTL_MAIN;
@ -34,17 +32,21 @@ int main()
{ {
bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#ifdef HAS_LAPACK
bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#endif
//bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT); //bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0; return 0;

View File

@ -146,6 +146,22 @@ public :
Y[i]+=coef*X[i]; Y[i]+=coef*X[i];
} }
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
for (int i=0;i<N;i++)
Y[i] = a*X[i] + b*Y[i];
}
static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
copy_vector(B,X,N);
for(int i=0; i<N; ++i)
{
X[i] /= L[i][i];
real tmp = X[i];
for (int j=i+1; j<N; ++j)
X[j] -= tmp * L[i][j];
}
}
static inline real norm_diff(const stl_vector & A, const stl_vector & B) static inline real norm_diff(const stl_vector & A, const stl_vector & B)
{ {
int N=A.size(); int N=A.size();

View File

@ -20,19 +20,14 @@
#include "utilities.h" #include "utilities.h"
#include "STL_interface.hh" #include "STL_interface.hh"
#include "bench.hh" #include "bench.hh"
#include "action_matrix_vector_product.hh" #include "basic_actions.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
BTL_MAIN; BTL_MAIN;
int main() int main()
{ {
bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -3,13 +3,28 @@ find_package(Eigen2)
if (EIGEN2_FOUND) if (EIGEN2_FOUND)
include_directories(${EIGEN2_INCLUDE_DIR}) include_directories(${EIGEN2_INCLUDE_DIR})
btl_add_bench(btl_eigen2 main.cpp) btl_add_bench(btl_eigen2_linear main_linear.cpp)
btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
btl_add_bench(btl_eigen2_adv main_adv.cpp)
IF(NOT BTL_NOVEC) IF(NOT BTL_NOVEC)
btl_add_bench(btl_eigen2_novec main.cpp) btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
if(BUILD_btl_eigen2_novec) btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)
set_target_properties(btl_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE") btl_add_bench(btl_eigen2_novec_matmat main_matmat.cpp)
endif(BUILD_btl_eigen2_novec) btl_add_bench(btl_eigen2_novec_adv main_adv.cpp)
if(BUILD_btl_eigen2_novec_linear)
set_target_properties(btl_eigen2_novec_linear PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
endif(BUILD_btl_eigen2_novec_linear)
if(BUILD_btl_eigen2_novec_vecmat)
set_target_properties(btl_eigen2_novec_vecmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
endif(BUILD_btl_eigen2_novec_vecmat)
if(BUILD_btl_eigen2_novec_matmat)
set_target_properties(btl_eigen2_novec_matmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
endif(BUILD_btl_eigen2_novec_matmat)
if(BUILD_btl_eigen2_novec_adv)
set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
endif(BUILD_btl_eigen2_novec_adv)
ENDIF(NOT BTL_NOVEC) ENDIF(NOT BTL_NOVEC)
btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF) btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)

View File

@ -19,6 +19,7 @@
#define EIGEN2_INTERFACE_HH #define EIGEN2_INTERFACE_HH
#include <Eigen/Core> #include <Eigen/Core>
#include <Eigen/Cholesky>
#include <vector> #include <vector>
#include "btl.hh" #include "btl.hh"
@ -107,17 +108,21 @@ public :
} }
static inline void matrix_vector_product(const gene_matrix & __restrict__ A, const gene_vector & __restrict__ B, gene_vector & __restrict__ X, int N){ static inline void matrix_vector_product(const gene_matrix & __restrict__ A, const gene_vector & __restrict__ B, gene_vector & __restrict__ X, int N){
X = (A*B).lazy(); X = (A*B)/*.lazy()*/;
} }
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
X = (A.transpose()*B).lazy(); X = (A.transpose()*B)/*.lazy()*/;
} }
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){ static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
Y += coef * X; Y += coef * X;
} }
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
Y = a*X + b*Y;
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
cible = source; cible = source;
} }
@ -126,6 +131,16 @@ public :
cible = source; cible = source;
} }
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){
X = L.template marked<Lower>().inverseProduct(B);
}
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
// C = X;
// Cholesky<gene_matrix>::computeInPlace(C);
C = X.cholesky().matrixL();
}
}; };
#endif #endif

View File

@ -0,0 +1,34 @@
//=====================================================
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "eigen2_interface.hh"
#include "bench.hh"
#include "action_trisolve.hh"
#include "action_cholesky.hh"
BTL_MAIN;
int main()
{
bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0;
}

View File

@ -0,0 +1,34 @@
//=====================================================
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "eigen2_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
BTL_MAIN;
int main()
{
bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
return 0;
}

View File

@ -0,0 +1,34 @@
//=====================================================
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "eigen2_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
BTL_MAIN;
int main()
{
bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0;
}

View File

@ -18,29 +18,15 @@
#include "utilities.h" #include "utilities.h"
#include "eigen2_interface.hh" #include "eigen2_interface.hh"
#include "bench.hh" #include "bench.hh"
#include "action_matrix_vector_product.hh" #include "basic_actions.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
BTL_MAIN; BTL_MAIN;
int main() int main()
{ {
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
// bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0; return 0;
} }

View File

@ -106,6 +106,10 @@ public :
gmm::add(gmm::scaled(X,coef), Y); gmm::add(gmm::scaled(X,coef), Y);
} }
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
gmm::add(gmm::scaled(X,a), gmm::scaled(Y,b), Y);
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
gmm::copy(source,cible); gmm::copy(source,cible);
} }
@ -114,6 +118,12 @@ public :
gmm::copy(source,cible); gmm::copy(source,cible);
} }
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
gmm::copy(B,X);
gmm::lower_tri_solve(L, X, false);
}
}; };
#endif #endif

View File

@ -18,13 +18,7 @@
#include "utilities.h" #include "utilities.h"
#include "gmm_interface.hh" #include "gmm_interface.hh"
#include "bench.hh" #include "bench.hh"
#include "action_matrix_vector_product.hh" #include "basic_actions.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
BTL_MAIN; BTL_MAIN;
@ -32,12 +26,16 @@ int main()
{ {
bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT); //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0; return 0;

View File

@ -72,92 +72,148 @@ static inline void matrix_vector_product(const gene_matrix & A, const gene_vecto
{ {
asm("#begin matrix_vector_product"); asm("#begin matrix_vector_product");
int AN = (N/PacketSize)*PacketSize; int AN = (N/PacketSize)*PacketSize;
int ANP = (AN/(4*PacketSize))*4*PacketSize; int ANP = (AN/(2*PacketSize))*2*PacketSize;
int bound = (N/4)*4; int bound = (N/4)*4;
for (int i=0;i<N;i++) for (int i=0;i<N;i++)
X[i] = 0; X[i] = 0;
for (int i=0;i<bound;i+=4) for (int i=0;i<bound;i+=4)
{ {
real tmp0 = B[i]; register real* __restrict__ A0 = A + i*N;
Packet ptmp0 = ei_pset1(tmp0); register real* __restrict__ A1 = A + (i+1)*N;
real tmp1 = B[i+1]; register real* __restrict__ A2 = A + (i+2)*N;
Packet ptmp1 = ei_pset1(tmp1); register real* __restrict__ A3 = A + (i+3)*N;
real tmp2 = B[i+2];
Packet ptmp2 = ei_pset1(tmp2); Packet ptmp0 = ei_pset1(B[i]);
real tmp3 = B[i+3]; Packet ptmp1 = ei_pset1(B[i+1]);
Packet ptmp3 = ei_pset1(tmp3); Packet ptmp2 = ei_pset1(B[i+2]);
int iN0 = i*N; Packet ptmp3 = ei_pset1(B[i+3]);
int iN1 = (i+1)*N; // register Packet ptmp0, ptmp1, ptmp2, ptmp3;
int iN2 = (i+2)*N; // asm(
int iN3 = (i+3)*N; //
// "movss (%[B],%[j],4), %[ptmp0] \n\t"
// "shufps $0,%[ptmp0],%[ptmp0] \n\t"
// "movss 4(%[B],%[j],4), %[ptmp1] \n\t"
// "shufps $0,%[ptmp1],%[ptmp1] \n\t"
// "movss 8(%[B],%[j],4), %[ptmp2] \n\t"
// "shufps $0,%[ptmp2],%[ptmp2] \n\t"
// "movss 12(%[B],%[j],4), %[ptmp3] \n\t"
// "shufps $0,%[ptmp3],%[ptmp3] \n\t"
// : [ptmp0] "=x" (ptmp0),
// [ptmp1] "=x" (ptmp1),
// [ptmp2] "=x" (ptmp2),
// [ptmp3] "=x" (ptmp3)
// : [B] "r" (B),
// [j] "r" (size_t(i))
// : );
if (AN>0) if (AN>0)
{ {
// int aligned0 = (iN0 % PacketSize); // for (size_t j = 0;j<ANP;j+=8)
int aligned1 = (iN1 % PacketSize); // {
// asm(
//
// "movaps (%[A0],%[j],4), %%xmm8 \n\t"
// "movaps 16(%[A0],%[j],4), %%xmm12 \n\t"
// "movups (%[A3],%[j],4), %%xmm11 \n\t"
// "movups 16(%[A3],%[j],4), %%xmm15 \n\t"
// "movups (%[A2],%[j],4), %%xmm10 \n\t"
// "movups 16(%[A2],%[j],4), %%xmm14 \n\t"
// "movups (%[A1],%[j],4), %%xmm9 \n\t"
// "movups 16(%[A1],%[j],4), %%xmm13 \n\t"
//
// "mulps %[ptmp0], %%xmm8 \n\t"
// "addps (%[res0],%[j],4), %%xmm8 \n\t"
// "mulps %[ptmp3], %%xmm11 \n\t"
// "addps %%xmm11, %%xmm8 \n\t"
// "mulps %[ptmp2], %%xmm10 \n\t"
// "addps %%xmm10, %%xmm8 \n\t"
// "mulps %[ptmp1], %%xmm9 \n\t"
// "addps %%xmm9, %%xmm8 \n\t"
// "movaps %%xmm8, (%[res0],%[j],4) \n\t"
//
// "mulps %[ptmp0], %%xmm12 \n\t"
// "addps 16(%[res0],%[j],4), %%xmm12 \n\t"
// "mulps %[ptmp3], %%xmm15 \n\t"
// "addps %%xmm15, %%xmm12 \n\t"
// "mulps %[ptmp2], %%xmm14 \n\t"
// "addps %%xmm14, %%xmm12 \n\t"
// "mulps %[ptmp1], %%xmm13 \n\t"
// "addps %%xmm13, %%xmm12 \n\t"
// "movaps %%xmm12, 16(%[res0],%[j],4) \n\t"
// :
// : [res0] "r" (X), [j] "r" (j),[A0] "r" (A0),
// [A1] "r" (A1),
// [A2] "r" (A2),
// [A3] "r" (A3),
// [ptmp0] "x" (ptmp0),
// [ptmp1] "x" (ptmp1),
// [ptmp2] "x" (ptmp2),
// [ptmp3] "x" (ptmp3)
// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "%r14");
// }
register Packet A00;
register Packet A01;
register Packet A02;
register Packet A03;
register Packet A10;
register Packet A11;
register Packet A12;
register Packet A13;
for (int j = 0;j<ANP;j+=2*PacketSize)
{
// A00 = ei_pload(&A0[j]);
// A01 = ei_ploadu(&A1[j]);
// A02 = ei_ploadu(&A2[j]);
// A03 = ei_ploadu(&A3[j]);
// A10 = ei_pload(&A0[j+PacketSize]);
// A11 = ei_ploadu(&A1[j+PacketSize]);
// A12 = ei_ploadu(&A2[j+PacketSize]);
// A13 = ei_ploadu(&A3[j+PacketSize]);
//
// A00 = ei_pmul(ptmp0, A00);
// A01 = ei_pmul(ptmp1, A01);
// A02 = ei_pmul(ptmp2, A02);
// A03 = ei_pmul(ptmp3, A03);
// A10 = ei_pmul(ptmp0, A10);
// A11 = ei_pmul(ptmp1, A11);
// A12 = ei_pmul(ptmp2, A12);
// A13 = ei_pmul(ptmp3, A13);
//
// A00 = ei_padd(A00,A01);
// A02 = ei_padd(A02,A03);
// A00 = ei_padd(A00,ei_pload(&X[j]));
// A00 = ei_padd(A00,A02);
// ei_pstore(&X[j],A00);
//
// A10 = ei_padd(A10,A11);
// A12 = ei_padd(A12,A13);
// A10 = ei_padd(A10,ei_pload(&X[j+PacketSize]));
// A10 = ei_padd(A10,A12);
// ei_pstore(&X[j+PacketSize],A10);
if (aligned1==0)
{
for (int j = 0;j<AN;j+=PacketSize)
{
ei_pstore(&X[j], ei_pstore(&X[j],
ei_padd(ei_pload(&X[j]), ei_padd(ei_pload(&X[j]),
ei_padd( ei_padd(
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))), ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) ))); ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
}
}
else if (aligned1==2)
{
for (int j = 0;j<AN;j+=PacketSize)
{
ei_pstore(&X[j],
ei_padd(ei_pload(&X[j]),
ei_padd(
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
}
}
else
{
for (int j = 0;j<ANP;j+=4*PacketSize)
{
ei_pstore(&X[j],
ei_padd(ei_pload(&X[j]),
ei_padd(
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
ei_pstore(&X[j+PacketSize], ei_pstore(&X[j+PacketSize],
ei_padd(ei_pload(&X[j+PacketSize]), ei_padd(ei_pload(&X[j+PacketSize]),
ei_padd( ei_padd(
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))), ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j+PacketSize])),ei_pmul(ptmp1,ei_ploadu(&A1[j+PacketSize]))),
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) ))); ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j+PacketSize])),ei_pmul(ptmp3,ei_ploadu(&A3[j+PacketSize]))) )));
ei_pstore(&X[j+2*PacketSize],
ei_padd(ei_pload(&X[j+2*PacketSize]),
ei_padd(
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
ei_pstore(&X[j+3*PacketSize],
ei_padd(ei_pload(&X[j+3*PacketSize]),
ei_padd(
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
} }
for (int j = ANP;j<AN;j+=PacketSize) for (int j = ANP;j<AN;j+=PacketSize)
ei_pstore(&X[j], ei_pstore(&X[j],
ei_padd(ei_pload(&X[j]), ei_padd(ei_pload(&X[j]),
ei_padd( ei_padd(
ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))), ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) ))); ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
}
} }
// process remaining scalars // process remaining scalars
for (int j=AN;j<N;j++) for (int j=AN;j<N;j++)
X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1]; X[j] += ei_pfirst(ptmp0) * A0[j] + ei_pfirst(ptmp1) * A1[j] + ei_pfirst(ptmp2) * A2[j] + ei_pfirst(ptmp3) * A3[j];
} }
for (int i=bound;i<N;i++) for (int i=bound;i<N;i++)
{ {
@ -181,6 +237,119 @@ static inline void matrix_vector_product(const gene_matrix & A, const gene_vecto
asm("#end matrix_vector_product"); asm("#end matrix_vector_product");
} }
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
// {
// asm("#begin matrix_vector_product");
// int AN = (N/PacketSize)*PacketSize;
// int ANP = (AN/(2*PacketSize))*2*PacketSize;
// int bound = (N/4)*4;
// for (int i=0;i<N;i++)
// X[i] = 0;
//
// for (int i=0;i<bound;i+=4)
// {
// real tmp0 = B[i];
// Packet ptmp0 = ei_pset1(tmp0);
// real tmp1 = B[i+1];
// Packet ptmp1 = ei_pset1(tmp1);
// real tmp2 = B[i+2];
// Packet ptmp2 = ei_pset1(tmp2);
// real tmp3 = B[i+3];
// Packet ptmp3 = ei_pset1(tmp3);
// int iN0 = i*N;
// int iN1 = (i+1)*N;
// int iN2 = (i+2)*N;
// int iN3 = (i+3)*N;
// if (AN>0)
// {
// // int aligned0 = (iN0 % PacketSize);
// int aligned1 = (iN1 % PacketSize);
//
// if (aligned1==0)
// {
// for (int j = 0;j<AN;j+=PacketSize)
// {
// ei_pstore(&X[j],
// ei_padd(ei_pload(&X[j]),
// ei_padd(
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))),
// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) )));
// }
// }
// else if (aligned1==2)
// {
// for (int j = 0;j<AN;j+=PacketSize)
// {
// ei_pstore(&X[j],
// ei_padd(ei_pload(&X[j]),
// ei_padd(
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
// ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
// }
// }
// else
// {
// for (int j = 0;j<ANP;j+=2*PacketSize)
// {
// ei_pstore(&X[j],
// ei_padd(ei_pload(&X[j]),
// ei_padd(
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
//
// ei_pstore(&X[j+PacketSize],
// ei_padd(ei_pload(&X[j+PacketSize]),
// ei_padd(
// ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))),
// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) )));
//
// // ei_pstore(&X[j+2*PacketSize],
// // ei_padd(ei_pload(&X[j+2*PacketSize]),
// // ei_padd(
// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
// //
// // ei_pstore(&X[j+3*PacketSize],
// // ei_padd(ei_pload(&X[j+3*PacketSize]),
// // ei_padd(
// // ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
// // ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
//
// }
// for (int j = ANP;j<AN;j+=PacketSize)
// ei_pstore(&X[j],
// ei_padd(ei_pload(&X[j]),
// ei_padd(
// ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
// ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
// }
// }
// // process remaining scalars
// for (int j=AN;j<N;j++)
// X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1] + tmp2 * A[j+iN2] + tmp3 * A[j+iN3];
// }
// for (int i=bound;i<N;i++)
// {
// real tmp0 = B[i];
// Packet ptmp0 = ei_pset1(tmp0);
// int iN0 = i*N;
// if (AN>0)
// {
// bool aligned0 = (iN0 % PacketSize) == 0;
// if (aligned0)
// for (int j = 0;j<AN;j+=PacketSize)
// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pload(&X[j])));
// else
// for (int j = 0;j<AN;j+=PacketSize)
// ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pload(&X[j])));
// }
// // process remaining scalars
// for (int j=AN;j<N;j++)
// X[j] += tmp0 * A[j+iN0];
// }
// asm("#end matrix_vector_product");
// }
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N) // static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
// { // {
// asm("#begin matrix_vector_product"); // asm("#begin matrix_vector_product");

View File

@ -18,13 +18,8 @@
#include "utilities.h" #include "utilities.h"
#include "mtl4_interface.hh" #include "mtl4_interface.hh"
#include "bench.hh" #include "bench.hh"
#include "action_matrix_vector_product.hh" #include "basic_actions.hh"
#include "action_matrix_matrix_product.hh" #include "action_cholesky.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
BTL_MAIN; BTL_MAIN;
@ -32,12 +27,17 @@ int main()
{ {
bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); // bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); // bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0; return 0;
} }

View File

@ -19,6 +19,7 @@
#define MTL4_INTERFACE_HH #define MTL4_INTERFACE_HH
#include <boost/numeric/mtl/mtl.hpp> #include <boost/numeric/mtl/mtl.hpp>
#include <boost/numeric/mtl/operation/cholesky.hpp>
#include <vector> #include <vector>
using namespace mtl; using namespace mtl;
@ -81,6 +82,9 @@ public :
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
X = (A*B); X = (A*B);
// morton_dense<double, doppled_64_row_mask> C(N,N);
// C = B;
// X = (A*C);
} }
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
@ -88,11 +92,11 @@ public :
} }
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){ static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
// X = (trans(A)*A); X = (trans(A)*A);
} }
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){ static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
// X = (A*trans(A)); X = (A*trans(A));
} }
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
@ -107,6 +111,19 @@ public :
Y += coef * X; Y += coef * X;
} }
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
Y = a*X + b*Y;
}
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
C = X;
recursive_cholesky(C);
}
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
X = lower_trisolve(L, B);
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
cible = source; cible = source;
} }

View File

@ -20,18 +20,15 @@
#include "utilities.h" #include "utilities.h"
#include "ublas_interface.hh" #include "ublas_interface.hh"
#include "bench.hh" #include "bench.hh"
#include "action_matrix_vector_product.hh" #include "basic_actions.hh"
#include "action_matrix_matrix_product.hh" #include "basic_actions.hh"
#include "action_axpy.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
BTL_MAIN; BTL_MAIN;
int main() int main()
{ {
bench<Action_axpy<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench<Action_axpy<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_matrix_vector_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT); bench<Action_atv_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
@ -40,6 +37,8 @@ int main()
bench<Action_ata_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_ata_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT); bench<Action_aat_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0; return 0;
} }

View File

@ -23,7 +23,9 @@
#include <boost/numeric/ublas/vector.hpp> #include <boost/numeric/ublas/vector.hpp>
#include <boost/numeric/ublas/matrix.hpp> #include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/io.hpp> #include <boost/numeric/ublas/io.hpp>
#include <boost/numeric/ublas/triangular.hpp>
using namespace boost::numeric;
template <class real> template <class real>
class ublas_interface{ class ublas_interface{
@ -116,6 +118,10 @@ public :
Y.plus_assign(coef*X); Y.plus_assign(coef*X);
} }
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
Y = a*X + b*Y;
}
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
// X = prod(trans(A),A); // X = prod(trans(A),A);
X.assign(prod(trans(A),A)); X.assign(prod(trans(A),A));
@ -126,6 +132,10 @@ public :
X.assign(prod(A,trans(A))); X.assign(prod(A,trans(A)));
} }
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
X = solve(L, B, ublas::lower_tag ());
}
}; };
#endif #endif

View File

@ -60,8 +60,8 @@ int main(int argc, char *argv[])
BenchTimer timer; BenchTimer timer;
#if 1 #if 1
EigenSparseTriMatrix sm1(rows,cols); EigenSparseTriMatrix sm1(rows,cols);
VectorXf b = VectorXf::random(cols); VectorXf b = VectorXf::Random(cols);
VectorXf x = VectorXf::random(cols); VectorXf x = VectorXf::Random(cols);
bool densedone = false; bool densedone = false;
@ -171,9 +171,9 @@ int main(int argc, char *argv[])
{ {
timer.reset(); timer.reset();
for (int _j=0; _j<10; ++_j) { for (int _j=0; _j<10; ++_j) {
Matrix4f m = Matrix4f::random(); Matrix4f m = Matrix4f::Random();
Vector4f b = Vector4f::random(); Vector4f b = Vector4f::Random();
Vector4f x = Vector4f::random(); Vector4f x = Vector4f::Random();
timer.start(); timer.start();
for (int _k=0; _k<1000000; ++_k) { for (int _k=0; _k<1000000; ++_k) {
b = m.inverseProduct(b); b = m.inverseProduct(b);
@ -186,9 +186,9 @@ int main(int argc, char *argv[])
{ {
timer.reset(); timer.reset();
for (int _j=0; _j<10; ++_j) { for (int _j=0; _j<10; ++_j) {
Matrix4f m = Matrix4f::random(); Matrix4f m = Matrix4f::Random();
Vector4f b = Vector4f::random(); Vector4f b = Vector4f::Random();
Vector4f x = Vector4f::random(); Vector4f x = Vector4f::Random();
timer.start(); timer.start();
for (int _k=0; _k<1000000; ++_k) { for (int _k=0; _k<1000000; ++_k) {
m.inverseProductInPlace(x); m.inverseProductInPlace(x);