mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
BTL: rm stupid backends
This commit is contained in:
parent
fe595e91ae
commit
e35b1ef3f3
@ -90,17 +90,13 @@ endmacro(btl_add_target_property)
|
||||
ENABLE_TESTING()
|
||||
|
||||
add_subdirectory(libs/eigen3)
|
||||
# add_subdirectory(libs/hand_vec)
|
||||
add_subdirectory(libs/C_BLAS)
|
||||
add_subdirectory(libs/ublas)
|
||||
add_subdirectory(libs/gmm)
|
||||
add_subdirectory(libs/mtl4)
|
||||
add_subdirectory(libs/ublas)
|
||||
add_subdirectory(libs/blitz)
|
||||
add_subdirectory(libs/tvmet)
|
||||
add_subdirectory(libs/C_BLAS)
|
||||
add_subdirectory(libs/f77)
|
||||
add_subdirectory(libs/C)
|
||||
add_subdirectory(libs/STL)
|
||||
add_subdirectory(libs/STL_algo)
|
||||
|
||||
add_subdirectory(data)
|
||||
|
||||
|
@ -136,13 +136,13 @@ public :
|
||||
|
||||
Action_tridiagonalization( int size ):_size(size)
|
||||
{
|
||||
MESSAGE("Action_hessenberg Ctor");
|
||||
MESSAGE("Action_tridiagonalization Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
typename Interface::stl_matrix tmp;
|
||||
init_matrix<pseudo_random>(tmp,_size);
|
||||
init_matrix<null_function>(X_stl,_size);
|
||||
STL_interface<typename Interface::real_type>::ata_product(tmp,X_stl,_size);
|
||||
STL_interface<typename Interface::real_type>::aat_product(tmp,X_stl,_size);
|
||||
|
||||
init_matrix<null_function>(C_stl,_size);
|
||||
init_matrix<null_function>(resu_stl,_size);
|
||||
@ -155,9 +155,9 @@ public :
|
||||
_cost = 0;
|
||||
for (int j=0; j<_size-2; ++j)
|
||||
{
|
||||
int r = std::max(0,_size-j-1);
|
||||
int b = std::max(0,_size-j-2);
|
||||
_cost += 6 + 3*b + r*r*8;
|
||||
double r = std::max(0,_size-j-1);
|
||||
double b = std::max(0,_size-j-2);
|
||||
_cost += 6. + 3.*b + r*r*8.;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,3 +0,0 @@
|
||||
include_directories(${PROJECT_SOURCE_DIR}/libs/f77)
|
||||
btl_add_bench(btl_C main.cpp OFF)
|
||||
# set_target_properties(btl_C PROPERTIES COMPILE_FLAGS "-fpeel-loops")
|
@ -1,117 +0,0 @@
|
||||
//=====================================================
|
||||
// File : C_interface.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef C_INTERFACE_HH
|
||||
#define C_INTERFACE_HH
|
||||
|
||||
#include "f77_interface.hh"
|
||||
|
||||
template<class real>
|
||||
class C_interface : public f77_interface_base<real> {
|
||||
|
||||
public :
|
||||
|
||||
typedef typename f77_interface_base<real>::gene_matrix gene_matrix;
|
||||
typedef typename f77_interface_base<real>::gene_vector gene_vector;
|
||||
|
||||
static inline std::string name() { return "C"; }
|
||||
|
||||
static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
// for (int i=0;i<N;i++)
|
||||
// {
|
||||
// real somme = 0.0;
|
||||
// for (int j=0;j<N;j++)
|
||||
// somme += A[j*N+i] * B[j];
|
||||
// X[i] = somme;
|
||||
// }
|
||||
for (int i=0;i<N;i++)
|
||||
X[i] = 0;
|
||||
for (int i=0;i<N;i++)
|
||||
{
|
||||
real tmp = B[i];
|
||||
int iN = i*N;
|
||||
for (int j=0;j<N;j++)
|
||||
X[j] += tmp * A[j+iN];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void atv_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
for (int i=0;i<N;i++)
|
||||
{
|
||||
int iN = i*N;
|
||||
real somme = 0.0;
|
||||
for (int j=0;j<N;j++)
|
||||
somme += A[iN+j] * B[j];
|
||||
X[i] = somme;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
|
||||
{
|
||||
real somme;
|
||||
for (int i=0;i<N;i++){
|
||||
for (int j=0;j<N;j++){
|
||||
somme=0.0;
|
||||
for (int k=0;k<N;k++){
|
||||
somme += A[i+k*N] * B[k+j*N];
|
||||
}
|
||||
X[i+j*N] = somme;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
|
||||
real somme;
|
||||
for (int i=0;i<N;i++){
|
||||
for (int j=0;j<N;j++){
|
||||
somme=0.0;
|
||||
for (int k=0;k<N;k++){
|
||||
somme+=A[k+i*N]*A[k+j*N];
|
||||
}
|
||||
X[i+j*N]=somme;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||
real somme;
|
||||
for (int i=0;i<N;i++){
|
||||
for (int j=0;j<N;j++){
|
||||
somme=0.0;
|
||||
for (int k=0;k<N;k++){
|
||||
somme+=A[i+k*N]*A[j+k*N];
|
||||
}
|
||||
X[i+j*N] = somme;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
for (int i=0;i<N;i++)
|
||||
Y[i]+=coef*X[i];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -1,48 +0,0 @@
|
||||
//=====================================================
|
||||
// File : main.cpp
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#include "utilities.h"
|
||||
#include "bench.hh"
|
||||
#include "C_interface.hh"
|
||||
#include "action_matrix_vector_product.hh"
|
||||
#include "action_atv_product.hh"
|
||||
#include "action_matrix_matrix_product.hh"
|
||||
#include "action_axpy.hh"
|
||||
#include "action_ata_product.hh"
|
||||
#include "action_aat_product.hh"
|
||||
//#include "action_lu_solve.hh"
|
||||
#include "timers/mixed_perf_analyzer.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
bench<Action_matrix_vector_product<C_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
bench<Action_atv_product<C_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
bench<Action_matrix_matrix_product<C_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_aat_product<C_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_ata_product<C_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_axpy<C_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,2 +0,0 @@
|
||||
|
||||
btl_add_bench(btl_STL_algo main.cpp OFF)
|
@ -1,138 +0,0 @@
|
||||
//=====================================================
|
||||
// File : STL_algo_interface.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef STL_ALGO_INTERFACE_HH
|
||||
#define STL_ALGO_INTERFACE_HH
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include "utilities.h"
|
||||
|
||||
|
||||
template<class real>
|
||||
class STL_algo_interface{
|
||||
|
||||
public :
|
||||
|
||||
typedef real real_type ;
|
||||
|
||||
typedef std::vector<real> stl_vector;
|
||||
typedef std::vector<stl_vector > stl_matrix;
|
||||
|
||||
typedef stl_matrix gene_matrix;
|
||||
|
||||
typedef stl_vector gene_vector;
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "STL_algo";
|
||||
}
|
||||
|
||||
static void free_matrix(gene_matrix & A, int N){}
|
||||
|
||||
static void free_vector(gene_vector & B){}
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||
A=A_stl ;
|
||||
}
|
||||
|
||||
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
|
||||
B=B_stl ;
|
||||
}
|
||||
|
||||
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
|
||||
B_stl=B ;
|
||||
}
|
||||
|
||||
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||
A_stl=A ;
|
||||
}
|
||||
|
||||
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
|
||||
for (int i=0;i<N;i++)
|
||||
cible[i]=source[i];
|
||||
}
|
||||
|
||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N)
|
||||
{
|
||||
for (int i=0;i<N;i++){
|
||||
for (int j=0;j<N;j++){
|
||||
cible[i][j]=source[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class somme {
|
||||
public:
|
||||
|
||||
somme(real coef):_coef(coef){};
|
||||
|
||||
real operator()(const real & val1, const real & val2)
|
||||
{
|
||||
return _coef * val1 + val2;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
real _coef;
|
||||
|
||||
};
|
||||
|
||||
|
||||
class vector_generator {
|
||||
public:
|
||||
|
||||
vector_generator(const gene_matrix & a_matrix, const gene_vector & a_vector):
|
||||
_matrice(a_matrix),
|
||||
_vecteur(a_vector),
|
||||
_index(0)
|
||||
{};
|
||||
real operator()( void )
|
||||
{
|
||||
|
||||
const gene_vector & ai=_matrice[_index];
|
||||
int N=ai.size();
|
||||
|
||||
_index++;
|
||||
|
||||
return std::inner_product(&ai[0],&ai[N],&_vecteur[0],0.0);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
int _index;
|
||||
const gene_matrix & _matrice;
|
||||
const gene_vector & _vecteur;
|
||||
|
||||
};
|
||||
|
||||
static inline void atv_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
std::generate(&X[0],&X[N],vector_generator(A,B));
|
||||
}
|
||||
|
||||
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
|
||||
{
|
||||
std::transform(&X[0],&X[N],&Y[0],&Y[0],somme(coef));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -1,39 +0,0 @@
|
||||
//=====================================================
|
||||
// File : main.cpp
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#include "utilities.h"
|
||||
#include "STL_algo_interface.hh"
|
||||
#include "bench.hh"
|
||||
#include "action_atv_product.hh"
|
||||
#include "action_axpy.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
|
||||
bench<Action_atv_product<STL_algo_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
|
||||
bench<Action_axpy<STL_algo_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,6 +0,0 @@
|
||||
if(CMAKE_MINOR_VERSION GREATER 4)
|
||||
if(NOT MSVC)
|
||||
enable_language(Fortran)
|
||||
endif(NOT MSVC)
|
||||
btl_add_bench(btl_f77 main.cpp dmxv.f smxv.f dmxm.f smxm.f daxpy.f saxpy.f data.f sata.f daat.f saat.f OFF)
|
||||
endif(CMAKE_MINOR_VERSION GREATER 4)
|
@ -1,14 +0,0 @@
|
||||
SUBROUTINE DAAT(A,X,N)
|
||||
**
|
||||
** X = AT * A
|
||||
REAL*8 A(N,N),X(N,N),R
|
||||
DO 20 I=1,N
|
||||
DO 20 J=1,N
|
||||
R=0.
|
||||
DO 10 K=1,N
|
||||
R=R+A(I,K)*A(J,K)
|
||||
10 CONTINUE
|
||||
X(I,J)=R
|
||||
20 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,14 +0,0 @@
|
||||
SUBROUTINE DATA(A,X,N)
|
||||
**
|
||||
** X = AT * A
|
||||
REAL*8 A(N,N),X(N,N),R
|
||||
DO 20 I=1,N
|
||||
DO 20 J=1,N
|
||||
R=0.
|
||||
DO 10 K=1,N
|
||||
R=R+A(K,I)*A(K,J)
|
||||
10 CONTINUE
|
||||
X(I,J)=R
|
||||
20 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,18 +0,0 @@
|
||||
SUBROUTINE DAXPYF(N,A,X,Y)
|
||||
** ***************************************
|
||||
** CALCULE Y = Y + A*X
|
||||
** ***************************************
|
||||
*>N NOMBRE D'OPERATIONS A FAIRE
|
||||
*>A CONSTANTE MULTIPLICATIVE
|
||||
*>X TABLEAU
|
||||
*=Y TABLEAU DES RESULTATS
|
||||
*A R. SANCHEZ ( EARLY WINTER 1987 )
|
||||
*V M.F. ROBEAU
|
||||
REAL*8 X(1),Y(1)
|
||||
REAL*8 A
|
||||
DO 10 I=1,N
|
||||
Y(I)=Y(I)+A*X(I)
|
||||
10 CONTINUE
|
||||
RETURN
|
||||
END
|
||||
|
@ -1,32 +0,0 @@
|
||||
SUBROUTINE DMXM(A,N,B,M,C,L)
|
||||
**
|
||||
** C = A * B
|
||||
** A ET B MATRICES A(N,M) B(M,L) ==> C(N,L)
|
||||
**
|
||||
*>A PREMIERE MATRICE
|
||||
*>N PREMIERE DIMENSION DE A ET DE C
|
||||
*>B DEUXIEME MATRICE
|
||||
*>M DEUXIEME DIMENSION DE A ET PERMIERE DE B
|
||||
*<C MATRICE PRODUIT DE A ET DE B
|
||||
*>L DEUXIEME DIMENSION DE B ET DE C
|
||||
*A R. SANCHEZ ( EARLY WINTER 1987 )
|
||||
*V M.F. ROBEAU
|
||||
*M AM BAUDRON - AVRIL 94
|
||||
*: ERREUR DANS L'APPEL A L'UTILITAIRE SGEMM
|
||||
*: APPEL A L'UTILITAIRE SGEMM DE LA LIBRAIRIE BLAS SUR HP
|
||||
*M AM BAUDRON - NOVEMBRE 1991
|
||||
*: ERREUR ( SOMME SUR LES TERMES PAS FAITE )
|
||||
*: APPEL A L'UTILITAIRE SGEMM DE LA LIBRAIRIE BLAS SUR RISC
|
||||
*M AM BAUDRON - MAI 1993
|
||||
*: CHANGEMENT DES %IF LOCAL SUN MIPS SUITE A INTRODUCTION VERSION IBM
|
||||
REAL*8 A(N,M),B(M,L),C(N,L),R
|
||||
DO 20 I=1,N
|
||||
DO 20 J=1,L
|
||||
R=0.
|
||||
DO 10 K=1,M
|
||||
R=R+A(I,K)*B(K,J)
|
||||
10 CONTINUE
|
||||
C(I,J)=R
|
||||
20 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,36 +0,0 @@
|
||||
|
||||
SUBROUTINE DMXM(A,N,B,M,C,L)
|
||||
**
|
||||
** C = A * B
|
||||
** A ET B MATRICES A(N,M) B(M,L) ==> C(N,L)
|
||||
**
|
||||
*>A PREMIERE MATRICE
|
||||
*>N PREMIERE DIMENSION DE A ET DE C
|
||||
*>B DEUXIEME MATRICE
|
||||
*>M DEUXIEME DIMENSION DE A ET PERMIERE DE B
|
||||
*<C MATRICE PRODUIT DE A ET DE B
|
||||
*>L DEUXIEME DIMENSION DE B ET DE C
|
||||
*A R. SANCHEZ ( EARLY WINTER 1987 )
|
||||
*V M.F. ROBEAU
|
||||
*M AM BAUDRON - AVRIL 94
|
||||
*: ERREUR DANS L'APPEL A L'UTILITAIRE SGEMM
|
||||
*: APPEL A L'UTILITAIRE SGEMM DE LA LIBRAIRIE BLAS SUR HP
|
||||
*M AM BAUDRON - NOVEMBRE 1991
|
||||
*: ERREUR ( SOMME SUR LES TERMES PAS FAITE )
|
||||
*: APPEL A L'UTILITAIRE SGEMM DE LA LIBRAIRIE BLAS SUR RISC
|
||||
*M AM BAUDRON - MAI 1993
|
||||
*: CHANGEMENT DES %IF LOCAL SUN MIPS SUITE A INTRODUCTION VERSION IBM
|
||||
REAL*8 A(N,M),B(M,L),C(N,L),R
|
||||
DO 5 J=1,L
|
||||
DO 5 I=1,N
|
||||
C(I,J)=0.
|
||||
5 CONTINUE
|
||||
DO 10 K=1,M
|
||||
DO 20 J=1,L
|
||||
R=B(K,J)
|
||||
DO 20 I=1,N
|
||||
C(I,J)=C(I,J)+A(I,K)*R
|
||||
20 CONTINUE
|
||||
10 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,39 +0,0 @@
|
||||
SUBROUTINE DMXV(A,N,X,M,R)
|
||||
C
|
||||
**
|
||||
** VERSION DOUBLE PRECISION DE MXV
|
||||
** R = A * X
|
||||
** A MATRICE A(N,M)
|
||||
** R ET X VECTEURS
|
||||
**
|
||||
*>A PREMIERE MATRICE
|
||||
*>N PREMIERE DIMENSION DE A
|
||||
*>X VECTEUR
|
||||
*>M DEUXIEME DIMENSION DE A
|
||||
*<R VECTEUR PRODUIT DE A ET DE X
|
||||
**
|
||||
*A M. COSTE
|
||||
*V M.F. ROBEAU
|
||||
*M
|
||||
*
|
||||
REAL*8 X(1),R(1),A(N,M)
|
||||
REAL*8 S
|
||||
C DO 20 I=1,N
|
||||
C S=0.
|
||||
C DO 10 J=1,M
|
||||
C S=S+A(I,J)*X(J)
|
||||
C 10 CONTINUE
|
||||
C R(I)=S
|
||||
C 20 CONTINUE
|
||||
DO 5 I=1,N
|
||||
R(I)=0
|
||||
5 CONTINUE
|
||||
DO 10 J=1,M
|
||||
S=X(J)
|
||||
DO 20 I=1,N
|
||||
R(I)=R(I)+A(I,J)*S
|
||||
20 CONTINUE
|
||||
10 CONTINUE
|
||||
RETURN
|
||||
END
|
||||
|
@ -1,129 +0,0 @@
|
||||
//=====================================================
|
||||
// File : f77_interface.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef F77_INTERFACE_HH
|
||||
#define F77_INTERFACE_HH
|
||||
#include "f77_interface_base.hh"
|
||||
#include <string>
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void dmxv_(double * A, int * N, double * X, int * M, double *R);
|
||||
void smxv_(float * A, int * N, float * X, int * M, float *R);
|
||||
|
||||
void dmxm_(double * A, int * N, double * B, int * M, double *C, int * K);
|
||||
void smxm_(float * A, int * N, float * B, int * M, float *C, int * K);
|
||||
|
||||
void data_(double * A, double *X, int * N);
|
||||
void sata_(float * A, float *X, int * N);
|
||||
|
||||
void daat_(double * A, double *X, int * N);
|
||||
void saat_(float * A, float *X, int * N);
|
||||
|
||||
void saxpyf_(int * N, float * coef, float * X, float *Y);
|
||||
void daxpyf_(int * N, double * coef, double * X, double *Y);
|
||||
}
|
||||
|
||||
template<class real>
|
||||
class f77_interface : public f77_interface_base<real>
|
||||
{
|
||||
public :
|
||||
|
||||
typedef typename f77_interface_base<real>::gene_matrix gene_matrix;
|
||||
typedef typename f77_interface_base<real>::gene_vector gene_vector;
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "f77";
|
||||
}
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
dmxv_(A,&N,B,&N,X);
|
||||
}
|
||||
|
||||
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
|
||||
{
|
||||
dmxm_(A,&N,B,&N,X,&N);
|
||||
}
|
||||
|
||||
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
data_(A,X,&N);
|
||||
}
|
||||
|
||||
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
daat_(A,X,&N);
|
||||
}
|
||||
|
||||
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
|
||||
{
|
||||
int one=1;
|
||||
daxpyf_(&N,&coef,X,Y);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
class f77_interface<float> : public f77_interface_base<float>
|
||||
{
|
||||
public :
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "F77";
|
||||
}
|
||||
|
||||
|
||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
smxv_(A,&N,B,&N,X);
|
||||
}
|
||||
|
||||
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
|
||||
{
|
||||
smxm_(A,&N,B,&N,X,&N);
|
||||
}
|
||||
|
||||
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
sata_(A,X,&N);
|
||||
}
|
||||
|
||||
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
|
||||
{
|
||||
saat_(A,X,&N);
|
||||
}
|
||||
|
||||
|
||||
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N)
|
||||
{
|
||||
saxpyf_(&N,&coef,X,Y);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1,91 +0,0 @@
|
||||
//=====================================================
|
||||
// File : f77_interface_base.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:25 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef F77_INTERFACE_BASE_HH
|
||||
#define F77_INTERFACE_BASE_HH
|
||||
|
||||
#include "utilities.h"
|
||||
#include <vector>
|
||||
template<class real>
|
||||
class f77_interface_base{
|
||||
|
||||
public:
|
||||
|
||||
typedef real real_type ;
|
||||
typedef std::vector<real> stl_vector;
|
||||
typedef std::vector<stl_vector > stl_matrix;
|
||||
|
||||
typedef real * gene_matrix;
|
||||
typedef real * gene_vector;
|
||||
|
||||
static void free_matrix(gene_matrix & A, int N){
|
||||
delete A;
|
||||
}
|
||||
|
||||
static void free_vector(gene_vector & B){
|
||||
delete B;
|
||||
}
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||
int N = A_stl.size();
|
||||
A = new real[N*N];
|
||||
for (int j=0;j<N;j++)
|
||||
for (int i=0;i<N;i++)
|
||||
A[i+N*j] = A_stl[j][i];
|
||||
}
|
||||
|
||||
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
|
||||
int N = B_stl.size();
|
||||
B = new real[N];
|
||||
for (int i=0;i<N;i++)
|
||||
B[i] = B_stl[i];
|
||||
}
|
||||
|
||||
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
|
||||
int N = B_stl.size();
|
||||
for (int i=0;i<N;i++)
|
||||
B_stl[i] = B[i];
|
||||
}
|
||||
|
||||
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||
int N = A_stl.size();
|
||||
for (int j=0;j<N;j++){
|
||||
A_stl[j].resize(N);
|
||||
for (int i=0;i<N;i++)
|
||||
A_stl[j][i] = A[i+N*j];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
|
||||
for (int i=0;i<N;i++)
|
||||
cible[i]=source[i];
|
||||
}
|
||||
|
||||
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
|
||||
for (int j=0;j<N;j++){
|
||||
for (int i=0;i<N;i++){
|
||||
cible[i+N*j] = source[i+N*j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
@ -1,47 +0,0 @@
|
||||
//=====================================================
|
||||
// File : main.cpp
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:25 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#include "utilities.h"
|
||||
#include "f77_interface.hh"
|
||||
#include "bench.hh"
|
||||
#include "action_matrix_vector_product.hh"
|
||||
#include "action_matrix_matrix_product.hh"
|
||||
#include "action_axpy.hh"
|
||||
#include "action_lu_solve.hh"
|
||||
#include "action_ata_product.hh"
|
||||
#include "action_aat_product.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
bench<Action_axpy<f77_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||
|
||||
bench<Action_matrix_vector_product<f77_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
|
||||
bench<Action_matrix_matrix_product<f77_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
bench<Action_ata_product<f77_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
bench<Action_aat_product<f77_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,14 +0,0 @@
|
||||
SUBROUTINE SAAT(A,X,N)
|
||||
**
|
||||
** X = AT * A
|
||||
REAL*4 A(N,N),X(N,N)
|
||||
DO 20 I=1,N
|
||||
DO 20 J=1,N
|
||||
R=0.
|
||||
DO 10 K=1,N
|
||||
R=R+A(I,K)*A(J,K)
|
||||
10 CONTINUE
|
||||
X(I,J)=R
|
||||
20 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,14 +0,0 @@
|
||||
SUBROUTINE SATA(A,X,N)
|
||||
**
|
||||
** X = AT * A
|
||||
REAL*4 A(N,N),X(N,N)
|
||||
DO 20 I=1,N
|
||||
DO 20 J=1,N
|
||||
R=0.
|
||||
DO 10 K=1,N
|
||||
R=R+A(K,I)*A(K,J)
|
||||
10 CONTINUE
|
||||
X(I,J)=R
|
||||
20 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,16 +0,0 @@
|
||||
SUBROUTINE SAXPYF(N,A,X,Y)
|
||||
** ***************************************
|
||||
** CALCULE Y = Y + A*X
|
||||
** ***************************************
|
||||
*>N NOMBRE D'OPERATIONS A FAIRE
|
||||
*>A CONSTANTE MULTIPLICATIVE
|
||||
*>X TABLEAU
|
||||
*=Y TABLEAU DES RESULTATS
|
||||
*A R. SANCHEZ ( EARLY WINTER 1987 )
|
||||
*V M.F. ROBEAU
|
||||
DIMENSION X(1),Y(1)
|
||||
DO 10 I=1,N
|
||||
Y(I)=Y(I)+A*X(I)
|
||||
10 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,32 +0,0 @@
|
||||
SUBROUTINE SMXM(A,N,B,M,C,L)
|
||||
**
|
||||
** C = A * B
|
||||
** A ET B MATRICES A(N,M) B(M,L) ==> C(N,L)
|
||||
**
|
||||
*>A PREMIERE MATRICE
|
||||
*>N PREMIERE DIMENSION DE A ET DE C
|
||||
*>B DEUXIEME MATRICE
|
||||
*>M DEUXIEME DIMENSION DE A ET PERMIERE DE B
|
||||
*<C MATRICE PRODUIT DE A ET DE B
|
||||
*>L DEUXIEME DIMENSION DE B ET DE C
|
||||
*A R. SANCHEZ ( EARLY WINTER 1987 )
|
||||
*V M.F. ROBEAU
|
||||
*M AM BAUDRON - AVRIL 94
|
||||
*: ERREUR DANS L'APPEL A L'UTILITAIRE SGEMM
|
||||
*: APPEL A L'UTILITAIRE SGEMM DE LA LIBRAIRIE BLAS SUR HP
|
||||
*M AM BAUDRON - NOVEMBRE 1991
|
||||
*: ERREUR ( SOMME SUR LES TERMES PAS FAITE )
|
||||
*: APPEL A L'UTILITAIRE SGEMM DE LA LIBRAIRIE BLAS SUR RISC
|
||||
*M AM BAUDRON - MAI 1993
|
||||
*: CHANGEMENT DES %IF LOCAL SUN MIPS SUITE A INTRODUCTION VERSION IBM
|
||||
DIMENSION A(N,M),B(M,L),C(N,L)
|
||||
DO 20 I=1,N
|
||||
DO 20 J=1,L
|
||||
R=0.
|
||||
DO 10 K=1,M
|
||||
R=R+A(I,K)*B(K,J)
|
||||
10 CONTINUE
|
||||
C(I,J)=R
|
||||
20 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,38 +0,0 @@
|
||||
SUBROUTINE SMXV(A,N,X,M,R)
|
||||
C
|
||||
**
|
||||
** VERSION DOUBLE PRECISION DE MXV
|
||||
** R = A * X
|
||||
** A MATRICE A(N,M)
|
||||
** R ET X VECTEURS
|
||||
**
|
||||
*>A PREMIERE MATRICE
|
||||
*>N PREMIERE DIMENSION DE A
|
||||
*>X VECTEUR
|
||||
*>M DEUXIEME DIMENSION DE A
|
||||
*<R VECTEUR PRODUIT DE A ET DE X
|
||||
**
|
||||
*A M. COSTE
|
||||
*V M.F. ROBEAU
|
||||
*M
|
||||
*
|
||||
REAL*4 X(1),R(1),A(N,M)
|
||||
REAL*4 S
|
||||
C DO 20 I=1,N
|
||||
C S=0.
|
||||
C DO 10 J=1,M
|
||||
C S=S+A(I,J)*X(J)
|
||||
C 10 CONTINUE
|
||||
C R(I)=S
|
||||
C 20 CONTINUE
|
||||
DO 5 I=1,N
|
||||
R(I)=0
|
||||
5 CONTINUE
|
||||
DO 10 J=1,M
|
||||
S=X(J)
|
||||
DO 20 I=1,N
|
||||
R(I)=R(I)+A(I,J)*S
|
||||
20 CONTINUE
|
||||
10 CONTINUE
|
||||
RETURN
|
||||
END
|
@ -1,36 +0,0 @@
|
||||
//=====================================================
|
||||
// File : test_interface.hh
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:25 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef TEST_INTERFACE_HH
|
||||
#define TEST_INTERFACE_HH
|
||||
|
||||
template< class Interface >
|
||||
void test_interface( void ){
|
||||
|
||||
Interface::interface_name();
|
||||
|
||||
typename Interface::gene_matrix A;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,12 +0,0 @@
|
||||
find_package(Eigen2)
|
||||
if (EIGEN2_FOUND)
|
||||
|
||||
include_directories(${EIGEN2_INCLUDE_DIR} ${PROJECT_SOURCE_DIR}/libs/f77)
|
||||
btl_add_bench(btl_hand_vec main.cpp OFF)
|
||||
|
||||
btl_add_bench(btl_hand_peeling main.cpp OFF)
|
||||
if (BUILD_btl_hand_peeling)
|
||||
set_target_properties(btl_hand_peeling PROPERTIES COMPILE_FLAGS "-DPEELING")
|
||||
endif (BUILD_btl_hand_peeling)
|
||||
|
||||
endif (EIGEN2_FOUND)
|
@ -1,886 +0,0 @@
|
||||
//=====================================================
|
||||
// File : hand_vec_interface.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef HAND_VEC_INTERFACE_HH
|
||||
#define HAND_VEC_INTERFACE_HH
|
||||
|
||||
#include <Eigen/Core>
|
||||
#include "f77_interface.hh"
|
||||
|
||||
using namespace Eigen;
|
||||
|
||||
template<class real>
|
||||
class hand_vec_interface : public f77_interface_base<real> {
|
||||
|
||||
public :
|
||||
|
||||
typedef typename internal::packet_traits<real>::type Packet;
|
||||
static const int PacketSize = internal::packet_traits<real>::size;
|
||||
|
||||
typedef typename f77_interface_base<real>::stl_matrix stl_matrix;
|
||||
typedef typename f77_interface_base<real>::stl_vector stl_vector;
|
||||
typedef typename f77_interface_base<real>::gene_matrix gene_matrix;
|
||||
typedef typename f77_interface_base<real>::gene_vector gene_vector;
|
||||
|
||||
static void free_matrix(gene_matrix & A, int N){
|
||||
internal::aligned_free(A);
|
||||
}
|
||||
|
||||
static void free_vector(gene_vector & B){
|
||||
internal::aligned_free(B);
|
||||
}
|
||||
|
||||
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
|
||||
int N = A_stl.size();
|
||||
A = (real*)internal::aligned_malloc(N*N*sizeof(real));
|
||||
for (int j=0;j<N;j++)
|
||||
for (int i=0;i<N;i++)
|
||||
A[i+N*j] = A_stl[j][i];
|
||||
}
|
||||
|
||||
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
|
||||
int N = B_stl.size();
|
||||
B = (real*)internal::aligned_malloc(N*sizeof(real));
|
||||
for (int i=0;i<N;i++)
|
||||
B[i] = B_stl[i];
|
||||
}
|
||||
|
||||
static inline std::string name() {
|
||||
#ifdef PEELING
|
||||
return "hand_vectorized_peeling";
|
||||
#else
|
||||
return "hand_vectorized";
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
asm("#begin matrix_vector_product");
|
||||
int AN = (N/PacketSize)*PacketSize;
|
||||
int ANP = (AN/(2*PacketSize))*2*PacketSize;
|
||||
int bound = (N/4)*4;
|
||||
for (int i=0;i<N;i++)
|
||||
X[i] = 0;
|
||||
|
||||
for (int i=0;i<bound;i+=4)
|
||||
{
|
||||
register real* __restrict__ A0 = A + i*N;
|
||||
register real* __restrict__ A1 = A + (i+1)*N;
|
||||
register real* __restrict__ A2 = A + (i+2)*N;
|
||||
register real* __restrict__ A3 = A + (i+3)*N;
|
||||
|
||||
Packet ptmp0 = internal::pset1(B[i]);
|
||||
Packet ptmp1 = internal::pset1(B[i+1]);
|
||||
Packet ptmp2 = internal::pset1(B[i+2]);
|
||||
Packet ptmp3 = internal::pset1(B[i+3]);
|
||||
// register Packet ptmp0, ptmp1, ptmp2, ptmp3;
|
||||
// asm(
|
||||
//
|
||||
// "movss (%[B],%[j],4), %[ptmp0] \n\t"
|
||||
// "shufps $0,%[ptmp0],%[ptmp0] \n\t"
|
||||
// "movss 4(%[B],%[j],4), %[ptmp1] \n\t"
|
||||
// "shufps $0,%[ptmp1],%[ptmp1] \n\t"
|
||||
// "movss 8(%[B],%[j],4), %[ptmp2] \n\t"
|
||||
// "shufps $0,%[ptmp2],%[ptmp2] \n\t"
|
||||
// "movss 12(%[B],%[j],4), %[ptmp3] \n\t"
|
||||
// "shufps $0,%[ptmp3],%[ptmp3] \n\t"
|
||||
// : [ptmp0] "=x" (ptmp0),
|
||||
// [ptmp1] "=x" (ptmp1),
|
||||
// [ptmp2] "=x" (ptmp2),
|
||||
// [ptmp3] "=x" (ptmp3)
|
||||
// : [B] "r" (B),
|
||||
// [j] "r" (size_t(i))
|
||||
// : );
|
||||
|
||||
if (AN>0)
|
||||
{
|
||||
// for (size_t j = 0;j<ANP;j+=8)
|
||||
// {
|
||||
// asm(
|
||||
//
|
||||
// "movaps (%[A0],%[j],4), %%xmm8 \n\t"
|
||||
// "movaps 16(%[A0],%[j],4), %%xmm12 \n\t"
|
||||
// "movups (%[A3],%[j],4), %%xmm11 \n\t"
|
||||
// "movups 16(%[A3],%[j],4), %%xmm15 \n\t"
|
||||
// "movups (%[A2],%[j],4), %%xmm10 \n\t"
|
||||
// "movups 16(%[A2],%[j],4), %%xmm14 \n\t"
|
||||
// "movups (%[A1],%[j],4), %%xmm9 \n\t"
|
||||
// "movups 16(%[A1],%[j],4), %%xmm13 \n\t"
|
||||
//
|
||||
// "mulps %[ptmp0], %%xmm8 \n\t"
|
||||
// "addps (%[res0],%[j],4), %%xmm8 \n\t"
|
||||
// "mulps %[ptmp3], %%xmm11 \n\t"
|
||||
// "addps %%xmm11, %%xmm8 \n\t"
|
||||
// "mulps %[ptmp2], %%xmm10 \n\t"
|
||||
// "addps %%xmm10, %%xmm8 \n\t"
|
||||
// "mulps %[ptmp1], %%xmm9 \n\t"
|
||||
// "addps %%xmm9, %%xmm8 \n\t"
|
||||
// "movaps %%xmm8, (%[res0],%[j],4) \n\t"
|
||||
//
|
||||
// "mulps %[ptmp0], %%xmm12 \n\t"
|
||||
// "addps 16(%[res0],%[j],4), %%xmm12 \n\t"
|
||||
// "mulps %[ptmp3], %%xmm15 \n\t"
|
||||
// "addps %%xmm15, %%xmm12 \n\t"
|
||||
// "mulps %[ptmp2], %%xmm14 \n\t"
|
||||
// "addps %%xmm14, %%xmm12 \n\t"
|
||||
// "mulps %[ptmp1], %%xmm13 \n\t"
|
||||
// "addps %%xmm13, %%xmm12 \n\t"
|
||||
// "movaps %%xmm12, 16(%[res0],%[j],4) \n\t"
|
||||
// :
|
||||
// : [res0] "r" (X), [j] "r" (j),[A0] "r" (A0),
|
||||
// [A1] "r" (A1),
|
||||
// [A2] "r" (A2),
|
||||
// [A3] "r" (A3),
|
||||
// [ptmp0] "x" (ptmp0),
|
||||
// [ptmp1] "x" (ptmp1),
|
||||
// [ptmp2] "x" (ptmp2),
|
||||
// [ptmp3] "x" (ptmp3)
|
||||
// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "%r14");
|
||||
// }
|
||||
register Packet A00;
|
||||
register Packet A01;
|
||||
register Packet A02;
|
||||
register Packet A03;
|
||||
register Packet A10;
|
||||
register Packet A11;
|
||||
register Packet A12;
|
||||
register Packet A13;
|
||||
for (int j = 0;j<ANP;j+=2*PacketSize)
|
||||
{
|
||||
// A00 = internal::pload(&A0[j]);
|
||||
// A01 = internal::ploadu(&A1[j]);
|
||||
// A02 = internal::ploadu(&A2[j]);
|
||||
// A03 = internal::ploadu(&A3[j]);
|
||||
// A10 = internal::pload(&A0[j+PacketSize]);
|
||||
// A11 = internal::ploadu(&A1[j+PacketSize]);
|
||||
// A12 = internal::ploadu(&A2[j+PacketSize]);
|
||||
// A13 = internal::ploadu(&A3[j+PacketSize]);
|
||||
//
|
||||
// A00 = internal::pmul(ptmp0, A00);
|
||||
// A01 = internal::pmul(ptmp1, A01);
|
||||
// A02 = internal::pmul(ptmp2, A02);
|
||||
// A03 = internal::pmul(ptmp3, A03);
|
||||
// A10 = internal::pmul(ptmp0, A10);
|
||||
// A11 = internal::pmul(ptmp1, A11);
|
||||
// A12 = internal::pmul(ptmp2, A12);
|
||||
// A13 = internal::pmul(ptmp3, A13);
|
||||
//
|
||||
// A00 = internal::padd(A00,A01);
|
||||
// A02 = internal::padd(A02,A03);
|
||||
// A00 = internal::padd(A00,internal::pload(&X[j]));
|
||||
// A00 = internal::padd(A00,A02);
|
||||
// internal::pstore(&X[j],A00);
|
||||
//
|
||||
// A10 = internal::padd(A10,A11);
|
||||
// A12 = internal::padd(A12,A13);
|
||||
// A10 = internal::padd(A10,internal::pload(&X[j+PacketSize]));
|
||||
// A10 = internal::padd(A10,A12);
|
||||
// internal::pstore(&X[j+PacketSize],A10);
|
||||
|
||||
internal::pstore(&X[j],
|
||||
internal::padd(internal::pload(&X[j]),
|
||||
internal::padd(
|
||||
internal::padd(internal::pmul(ptmp0,internal::pload(&A0[j])),internal::pmul(ptmp1,internal::ploadu(&A1[j]))),
|
||||
internal::padd(internal::pmul(ptmp2,internal::ploadu(&A2[j])),internal::pmul(ptmp3,internal::ploadu(&A3[j]))) )));
|
||||
|
||||
internal::pstore(&X[j+PacketSize],
|
||||
internal::padd(internal::pload(&X[j+PacketSize]),
|
||||
internal::padd(
|
||||
internal::padd(internal::pmul(ptmp0,internal::pload(&A0[j+PacketSize])),internal::pmul(ptmp1,internal::ploadu(&A1[j+PacketSize]))),
|
||||
internal::padd(internal::pmul(ptmp2,internal::ploadu(&A2[j+PacketSize])),internal::pmul(ptmp3,internal::ploadu(&A3[j+PacketSize]))) )));
|
||||
}
|
||||
for (int j = ANP;j<AN;j+=PacketSize)
|
||||
internal::pstore(&X[j],
|
||||
internal::padd(internal::pload(&X[j]),
|
||||
internal::padd(
|
||||
internal::padd(internal::pmul(ptmp0,internal::pload(&A0[j])),internal::pmul(ptmp1,internal::ploadu(&A1[j]))),
|
||||
internal::padd(internal::pmul(ptmp2,internal::ploadu(&A2[j])),internal::pmul(ptmp3,internal::ploadu(&A3[j]))) )));
|
||||
}
|
||||
// process remaining scalars
|
||||
for (int j=AN;j<N;j++)
|
||||
X[j] += internal::pfirst(ptmp0) * A0[j] + internal::pfirst(ptmp1) * A1[j] + internal::pfirst(ptmp2) * A2[j] + internal::pfirst(ptmp3) * A3[j];
|
||||
}
|
||||
for (int i=bound;i<N;i++)
|
||||
{
|
||||
real tmp0 = B[i];
|
||||
Packet ptmp0 = internal::pset1(tmp0);
|
||||
int iN0 = i*N;
|
||||
if (AN>0)
|
||||
{
|
||||
bool aligned0 = (iN0 % PacketSize) == 0;
|
||||
if (aligned0)
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
internal::pstore(&X[j], internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),internal::pload(&X[j])));
|
||||
else
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
internal::pstore(&X[j], internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+iN0])),internal::pload(&X[j])));
|
||||
}
|
||||
// process remaining scalars
|
||||
for (int j=AN;j<N;j++)
|
||||
X[j] += tmp0 * A[j+iN0];
|
||||
}
|
||||
asm("#end matrix_vector_product");
|
||||
}
|
||||
|
||||
static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
|
||||
// int AN = (N/PacketSize)*PacketSize;
|
||||
// int ANP = (AN/(2*PacketSize))*2*PacketSize;
|
||||
// int bound = (N/4)*4;
|
||||
for (int i=0;i<N;i++)
|
||||
X[i] = 0;
|
||||
|
||||
int bound = std::max(0,N-8) & 0xfffffffE;
|
||||
|
||||
for (int j=0;j<bound;j+=2)
|
||||
{
|
||||
register real* __restrict__ A0 = A + j*N;
|
||||
register real* __restrict__ A1 = A + (j+1)*N;
|
||||
|
||||
real t0 = B[j];
|
||||
Packet ptmp0 = internal::pset1(t0);
|
||||
real t1 = B[j+1];
|
||||
Packet ptmp1 = internal::pset1(t1);
|
||||
|
||||
real t2 = 0;
|
||||
Packet ptmp2 = internal::pset1(t2);
|
||||
real t3 = 0;
|
||||
Packet ptmp3 = internal::pset1(t3);
|
||||
|
||||
int starti = j+2;
|
||||
int alignedEnd = starti;
|
||||
int alignedStart = (starti) + internal::first_aligned(&X[starti], N-starti);
|
||||
alignedEnd = alignedStart + ((N-alignedStart)/(PacketSize))*(PacketSize);
|
||||
|
||||
X[j] += t0 * A0[j];
|
||||
X[j+1] += t1 * A1[j];
|
||||
|
||||
X[j+1] += t0 * A0[j+1];
|
||||
t2 += A0[j+1] * B[j+1];
|
||||
|
||||
// alignedStart = alignedEnd;
|
||||
for (int i=starti; i<alignedStart; ++i) {
|
||||
X[i] += t0 * A0[i] + t1 * A1[i];
|
||||
t2 += A0[i] * B[i];
|
||||
t3 += A1[i] * B[i];
|
||||
}
|
||||
asm("#begin symv");
|
||||
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) {
|
||||
Packet A0i = internal::ploadu(&A0[i]);
|
||||
Packet A1i = internal::ploadu(&A1[i]);
|
||||
// Packet A0i1 = internal::ploadu(&A0[i+PacketSize]);
|
||||
Packet Xi = internal::pload(&X[i]);
|
||||
Packet Bi = internal::pload/*u*/(&B[i]);
|
||||
// Packet Xi1 = internal::pload(&X[i+PacketSize]);
|
||||
// Packet Bi1 = internal::pload/*u*/(&B[i+PacketSize]);
|
||||
Xi = internal::padd(internal::padd(Xi, internal::pmul(ptmp0, A0i)), internal::pmul(ptmp1, A1i));
|
||||
ptmp2 = internal::padd(ptmp2, internal::pmul(A0i, Bi));
|
||||
ptmp3 = internal::padd(ptmp3, internal::pmul(A1i, Bi));
|
||||
// Xi1 = internal::padd(Xi1, internal::pmul(ptmp1, A0i1));
|
||||
// ptmp2 = internal::padd(ptmp2, internal::pmul(A0i1, Bi1));
|
||||
//
|
||||
internal::pstore(&X[i],Xi);
|
||||
// internal::pstore(&X[i+PacketSize],Xi1);
|
||||
// asm(
|
||||
// "prefetchnta 64(%[A0],%[i],4) \n\t"
|
||||
// //"movups (%[A0],%[i],4), %%xmm8 \n\t"
|
||||
// "movsd (%[A0],%[i],4), %%xmm8 \n\t"
|
||||
// "movhps 8(%[A0],%[i],4), %%xmm8 \n\t"
|
||||
// // "movups 16(%[A0],%[i],4), %%xmm9 \n\t"
|
||||
// // "movups 64(%[A0],%[i],4), %%xmm15 \n\t"
|
||||
// "movaps (%[B], %[i],4), %%xmm12 \n\t"
|
||||
// // "movaps 16(%[B], %[i],4), %%xmm13 \n\t"
|
||||
// "movaps (%[X], %[i],4), %%xmm10 \n\t"
|
||||
// // "movaps 16(%[X], %[i],4), %%xmm11 \n\t"
|
||||
//
|
||||
// "mulps %%xmm8, %%xmm12 \n\t"
|
||||
// // "mulps %%xmm9, %%xmm13 \n\t"
|
||||
//
|
||||
// "mulps %[ptmp1], %%xmm8 \n\t"
|
||||
// "addps %%xmm12, %[ptmp2] \n\t"
|
||||
// "addps %%xmm8, %%xmm10 \n\t"
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
// // "mulps %[ptmp1], %%xmm9 \n\t"
|
||||
//
|
||||
// // "addps %%xmm9, %%xmm11 \n\t"
|
||||
// // "addps %%xmm13, %[ptmp2] \n\t"
|
||||
//
|
||||
// "movaps %%xmm10, (%[X],%[i],4) \n\t"
|
||||
// // "movaps %%xmm11, 16(%[X],%[i],4) \n\t"
|
||||
// :
|
||||
// : [X] "r" (X), [i] "r" (i), [A0] "r" (A0),
|
||||
// [B] "r" (B),
|
||||
// [ptmp1] "x" (ptmp1),
|
||||
// [ptmp2] "x" (ptmp2)
|
||||
// : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm15");
|
||||
}
|
||||
asm("#end symv");
|
||||
for (int i=alignedEnd; i<N; i++) {
|
||||
X[i] += t0 * A0[i] + t1 * A1[i];
|
||||
t2 += A0[i] * B[i];
|
||||
t3 += A1[i] * B[i];
|
||||
}
|
||||
|
||||
|
||||
X[j] += t2 + internal::predux(ptmp2);
|
||||
X[j+1] += t3 + internal::predux(ptmp3);
|
||||
}
|
||||
for (int j=bound;j<N;j++)
|
||||
{
|
||||
register real* __restrict__ A0 = A + j*N;
|
||||
|
||||
real t1 = B[j];
|
||||
real t2 = 0;
|
||||
X[j] += t1 * A0[j];
|
||||
for (int i=j+1; i<N; i+=PacketSize) {
|
||||
X[i] += t1 * A0[i];
|
||||
t2 += A0[i] * B[i];
|
||||
}
|
||||
X[j] += t2;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
// {
|
||||
// asm("#begin matrix_vector_product");
|
||||
// int AN = (N/PacketSize)*PacketSize;
|
||||
// int ANP = (AN/(2*PacketSize))*2*PacketSize;
|
||||
// int bound = (N/4)*4;
|
||||
// for (int i=0;i<N;i++)
|
||||
// X[i] = 0;
|
||||
//
|
||||
// for (int i=0;i<bound;i+=4)
|
||||
// {
|
||||
// real tmp0 = B[i];
|
||||
// Packet ptmp0 = internal::pset1(tmp0);
|
||||
// real tmp1 = B[i+1];
|
||||
// Packet ptmp1 = internal::pset1(tmp1);
|
||||
// real tmp2 = B[i+2];
|
||||
// Packet ptmp2 = internal::pset1(tmp2);
|
||||
// real tmp3 = B[i+3];
|
||||
// Packet ptmp3 = internal::pset1(tmp3);
|
||||
// int iN0 = i*N;
|
||||
// int iN1 = (i+1)*N;
|
||||
// int iN2 = (i+2)*N;
|
||||
// int iN3 = (i+3)*N;
|
||||
// if (AN>0)
|
||||
// {
|
||||
// // int aligned0 = (iN0 % PacketSize);
|
||||
// int aligned1 = (iN1 % PacketSize);
|
||||
//
|
||||
// if (aligned1==0)
|
||||
// {
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// {
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pload(&X[j]),
|
||||
// internal::padd(
|
||||
// internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),internal::pmul(ptmp1,internal::pload(&A[j+iN1]))),
|
||||
// internal::padd(internal::pmul(ptmp2,internal::pload(&A[j+iN2])),internal::pmul(ptmp3,internal::pload(&A[j+iN3]))) )));
|
||||
// }
|
||||
// }
|
||||
// else if (aligned1==2)
|
||||
// {
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// {
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pload(&X[j]),
|
||||
// internal::padd(
|
||||
// internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),internal::pmul(ptmp1,internal::ploadu(&A[j+iN1]))),
|
||||
// internal::padd(internal::pmul(ptmp2,internal::pload(&A[j+iN2])),internal::pmul(ptmp3,internal::ploadu(&A[j+iN3]))) )));
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// for (int j = 0;j<ANP;j+=2*PacketSize)
|
||||
// {
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pload(&X[j]),
|
||||
// internal::padd(
|
||||
// internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),internal::pmul(ptmp1,internal::ploadu(&A[j+iN1]))),
|
||||
// internal::padd(internal::pmul(ptmp2,internal::ploadu(&A[j+iN2])),internal::pmul(ptmp3,internal::ploadu(&A[j+iN3]))) )));
|
||||
//
|
||||
// internal::pstore(&X[j+PacketSize],
|
||||
// internal::padd(internal::pload(&X[j+PacketSize]),
|
||||
// internal::padd(
|
||||
// internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+PacketSize+iN0])),internal::pmul(ptmp1,internal::ploadu(&A[j+PacketSize+iN1]))),
|
||||
// internal::padd(internal::pmul(ptmp2,internal::ploadu(&A[j+PacketSize+iN2])),internal::pmul(ptmp3,internal::ploadu(&A[j+PacketSize+iN3]))) )));
|
||||
//
|
||||
// // internal::pstore(&X[j+2*PacketSize],
|
||||
// // internal::padd(internal::pload(&X[j+2*PacketSize]),
|
||||
// // internal::padd(
|
||||
// // internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+2*PacketSize+iN0])),internal::pmul(ptmp1,internal::ploadu(&A[j+2*PacketSize+iN1]))),
|
||||
// // internal::padd(internal::pmul(ptmp2,internal::ploadu(&A[j+2*PacketSize+iN2])),internal::pmul(ptmp3,internal::ploadu(&A[j+2*PacketSize+iN3]))) )));
|
||||
// //
|
||||
// // internal::pstore(&X[j+3*PacketSize],
|
||||
// // internal::padd(internal::pload(&X[j+3*PacketSize]),
|
||||
// // internal::padd(
|
||||
// // internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+3*PacketSize+iN0])),internal::pmul(ptmp1,internal::ploadu(&A[j+3*PacketSize+iN1]))),
|
||||
// // internal::padd(internal::pmul(ptmp2,internal::ploadu(&A[j+3*PacketSize+iN2])),internal::pmul(ptmp3,internal::ploadu(&A[j+3*PacketSize+iN3]))) )));
|
||||
//
|
||||
// }
|
||||
// for (int j = ANP;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pload(&X[j]),
|
||||
// internal::padd(
|
||||
// internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+iN0])),internal::pmul(ptmp1,internal::ploadu(&A[j+iN1]))),
|
||||
// internal::padd(internal::pmul(ptmp2,internal::ploadu(&A[j+iN2])),internal::pmul(ptmp3,internal::ploadu(&A[j+iN3]))) )));
|
||||
// }
|
||||
// }
|
||||
// // process remaining scalars
|
||||
// for (int j=AN;j<N;j++)
|
||||
// X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1] + tmp2 * A[j+iN2] + tmp3 * A[j+iN3];
|
||||
// }
|
||||
// for (int i=bound;i<N;i++)
|
||||
// {
|
||||
// real tmp0 = B[i];
|
||||
// Packet ptmp0 = internal::pset1(tmp0);
|
||||
// int iN0 = i*N;
|
||||
// if (AN>0)
|
||||
// {
|
||||
// bool aligned0 = (iN0 % PacketSize) == 0;
|
||||
// if (aligned0)
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),internal::pload(&X[j])));
|
||||
// else
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+iN0])),internal::pload(&X[j])));
|
||||
// }
|
||||
// // process remaining scalars
|
||||
// for (int j=AN;j<N;j++)
|
||||
// X[j] += tmp0 * A[j+iN0];
|
||||
// }
|
||||
// asm("#end matrix_vector_product");
|
||||
// }
|
||||
|
||||
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
// {
|
||||
// asm("#begin matrix_vector_product");
|
||||
// int AN = (N/PacketSize)*PacketSize;
|
||||
// for (int i=0;i<N;i++)
|
||||
// X[i] = 0;
|
||||
//
|
||||
// for (int i=0;i<N;i+=2)
|
||||
// {
|
||||
// real tmp0 = B[i];
|
||||
// Packet ptmp0 = internal::pset1(tmp0);
|
||||
// real tmp1 = B[i+1];
|
||||
// Packet ptmp1 = internal::pset1(tmp1);
|
||||
// int iN0 = i*N;
|
||||
// int iN1 = (i+1)*N;
|
||||
// if (AN>0)
|
||||
// {
|
||||
// bool aligned0 = (iN0 % PacketSize) == 0;
|
||||
// bool aligned1 = (iN1 % PacketSize) == 0;
|
||||
//
|
||||
// if (aligned0 && aligned1)
|
||||
// {
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// {
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::pload(&A[j+iN1])),internal::pload(&X[j]))));
|
||||
// }
|
||||
// }
|
||||
// else if (aligned0)
|
||||
// {
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// {
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::ploadu(&A[j+iN1])),internal::pload(&X[j]))));
|
||||
// }
|
||||
// }
|
||||
// else if (aligned1)
|
||||
// {
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// {
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::pload(&A[j+iN1])),internal::pload(&X[j]))));
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// int ANP = (AN/(4*PacketSize))*4*PacketSize;
|
||||
// for (int j = 0;j<ANP;j+=4*PacketSize)
|
||||
// {
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::ploadu(&A[j+iN1])),internal::pload(&X[j]))));
|
||||
//
|
||||
// internal::pstore(&X[j+PacketSize],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+PacketSize+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::ploadu(&A[j+PacketSize+iN1])),internal::pload(&X[j+PacketSize]))));
|
||||
//
|
||||
// internal::pstore(&X[j+2*PacketSize],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+2*PacketSize+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::ploadu(&A[j+2*PacketSize+iN1])),internal::pload(&X[j+2*PacketSize]))));
|
||||
//
|
||||
// internal::pstore(&X[j+3*PacketSize],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+3*PacketSize+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::ploadu(&A[j+3*PacketSize+iN1])),internal::pload(&X[j+3*PacketSize]))));
|
||||
// }
|
||||
// for (int j = ANP;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j],
|
||||
// internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+iN0])),
|
||||
// internal::padd(internal::pmul(ptmp1,internal::ploadu(&A[j+iN1])),internal::pload(&X[j]))));
|
||||
// }
|
||||
// }
|
||||
// // process remaining scalars
|
||||
// for (int j=AN;j<N;j++)
|
||||
// X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1];
|
||||
// }
|
||||
// int remaining = (N/2)*2;
|
||||
// for (int i=remaining;i<N;i++)
|
||||
// {
|
||||
// real tmp0 = B[i];
|
||||
// Packet ptmp0 = internal::pset1(tmp0);
|
||||
// int iN0 = i*N;
|
||||
// if (AN>0)
|
||||
// {
|
||||
// bool aligned0 = (iN0 % PacketSize) == 0;
|
||||
// if (aligned0)
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pmul(ptmp0,internal::pload(&A[j+iN0])),internal::pload(&X[j])));
|
||||
// else
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pmul(ptmp0,internal::ploadu(&A[j+iN0])),internal::pload(&X[j])));
|
||||
// }
|
||||
// // process remaining scalars
|
||||
// for (int j=AN;j<N;j++)
|
||||
// X[j] += tmp0 * A[j+iN0];
|
||||
// }
|
||||
// asm("#end matrix_vector_product");
|
||||
// }
|
||||
|
||||
// static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
// {
|
||||
// asm("#begin matrix_vector_product");
|
||||
// int AN = (N/PacketSize)*PacketSize;
|
||||
// for (int i=0;i<N;i++)
|
||||
// X[i] = 0;
|
||||
// for (int i=0;i<N;i++)
|
||||
// {
|
||||
// real tmp = B[i];
|
||||
// Packet ptmp = internal::pset1(tmp);
|
||||
// int iN = i*N;
|
||||
// if (AN>0)
|
||||
// {
|
||||
// bool aligned = (iN % PacketSize) == 0;
|
||||
// if (aligned)
|
||||
// {
|
||||
// #ifdef PEELING
|
||||
// Packet A0, A1, A2, X0, X1, X2;
|
||||
// int ANP = (AN/(8*PacketSize))*8*PacketSize;
|
||||
// for (int j = 0;j<ANP;j+=PacketSize*8)
|
||||
// {
|
||||
// A0 = internal::pload(&A[j+iN]);
|
||||
// X0 = internal::pload(&X[j]);
|
||||
// A1 = internal::pload(&A[j+PacketSize+iN]);
|
||||
// X1 = internal::pload(&X[j+PacketSize]);
|
||||
// A2 = internal::pload(&A[j+2*PacketSize+iN]);
|
||||
// X2 = internal::pload(&X[j+2*PacketSize]);
|
||||
// internal::pstore(&X[j], internal::padd(X0, internal::pmul(ptmp,A0)));
|
||||
// A0 = internal::pload(&A[j+3*PacketSize+iN]);
|
||||
// X0 = internal::pload(&X[j+3*PacketSize]);
|
||||
// internal::pstore(&X[j+PacketSize], internal::padd(internal::pload(&X1), internal::pmul(ptmp,A1)));
|
||||
// A1 = internal::pload(&A[j+4*PacketSize+iN]);
|
||||
// X1 = internal::pload(&X[j+4*PacketSize]);
|
||||
// internal::pstore(&X[j+2*PacketSize], internal::padd(internal::pload(&X2), internal::pmul(ptmp,A2)));
|
||||
// A2 = internal::pload(&A[j+5*PacketSize+iN]);
|
||||
// X2 = internal::pload(&X[j+5*PacketSize]);
|
||||
// internal::pstore(&X[j+3*PacketSize], internal::padd(internal::pload(&X0), internal::pmul(ptmp,A0)));
|
||||
// A0 = internal::pload(&A[j+6*PacketSize+iN]);
|
||||
// X0 = internal::pload(&X[j+6*PacketSize]);
|
||||
// internal::pstore(&X[j+4*PacketSize], internal::padd(internal::pload(&X1), internal::pmul(ptmp,A1)));
|
||||
// A1 = internal::pload(&A[j+7*PacketSize+iN]);
|
||||
// X1 = internal::pload(&X[j+7*PacketSize]);
|
||||
// internal::pstore(&X[j+5*PacketSize], internal::padd(internal::pload(&X2), internal::pmul(ptmp,A2)));
|
||||
// internal::pstore(&X[j+6*PacketSize], internal::padd(internal::pload(&X0), internal::pmul(ptmp,A0)));
|
||||
// internal::pstore(&X[j+7*PacketSize], internal::padd(internal::pload(&X1), internal::pmul(ptmp,A1)));
|
||||
// //
|
||||
// // internal::pstore(&X[j], internal::padd(internal::pload(&X[j]), internal::pmul(ptmp,internal::pload(&A[j+iN]))));
|
||||
// // internal::pstore(&X[j+PacketSize], internal::padd(internal::pload(&X[j+PacketSize]), internal::pmul(ptmp,internal::pload(&A[j+PacketSize+iN]))));
|
||||
// // internal::pstore(&X[j+2*PacketSize], internal::padd(internal::pload(&X[j+2*PacketSize]), internal::pmul(ptmp,internal::pload(&A[j+2*PacketSize+iN]))));
|
||||
// // internal::pstore(&X[j+3*PacketSize], internal::padd(internal::pload(&X[j+3*PacketSize]), internal::pmul(ptmp,internal::pload(&A[j+3*PacketSize+iN]))));
|
||||
// // internal::pstore(&X[j+4*PacketSize], internal::padd(internal::pload(&X[j+4*PacketSize]), internal::pmul(ptmp,internal::pload(&A[j+4*PacketSize+iN]))));
|
||||
// // internal::pstore(&X[j+5*PacketSize], internal::padd(internal::pload(&X[j+5*PacketSize]), internal::pmul(ptmp,internal::pload(&A[j+5*PacketSize+iN]))));
|
||||
// // internal::pstore(&X[j+6*PacketSize], internal::padd(internal::pload(&X[j+6*PacketSize]), internal::pmul(ptmp,internal::pload(&A[j+6*PacketSize+iN]))));
|
||||
// // internal::pstore(&X[j+7*PacketSize], internal::padd(internal::pload(&X[j+7*PacketSize]), internal::pmul(ptmp,internal::pload(&A[j+7*PacketSize+iN]))));
|
||||
// }
|
||||
// for (int j = ANP;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pload(&X[j]), internal::pmul(ptmp,internal::pload(&A[j+iN]))));
|
||||
// #else
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pload(&X[j]), internal::pmul(ptmp,internal::pload(&A[j+iN]))));
|
||||
// #endif
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// #ifdef PEELING
|
||||
// int ANP = (AN/(8*PacketSize))*8*PacketSize;
|
||||
// for (int j = 0;j<ANP;j+=PacketSize*8)
|
||||
// {
|
||||
// internal::pstore(&X[j], internal::padd(internal::pload(&X[j]), internal::pmul(ptmp,internal::ploadu(&A[j+iN]))));
|
||||
// internal::pstore(&X[j+PacketSize], internal::padd(internal::pload(&X[j+PacketSize]), internal::pmul(ptmp,internal::ploadu(&A[j+PacketSize+iN]))));
|
||||
// internal::pstore(&X[j+2*PacketSize], internal::padd(internal::pload(&X[j+2*PacketSize]), internal::pmul(ptmp,internal::ploadu(&A[j+2*PacketSize+iN]))));
|
||||
// internal::pstore(&X[j+3*PacketSize], internal::padd(internal::pload(&X[j+3*PacketSize]), internal::pmul(ptmp,internal::ploadu(&A[j+3*PacketSize+iN]))));
|
||||
// internal::pstore(&X[j+4*PacketSize], internal::padd(internal::pload(&X[j+4*PacketSize]), internal::pmul(ptmp,internal::ploadu(&A[j+4*PacketSize+iN]))));
|
||||
// internal::pstore(&X[j+5*PacketSize], internal::padd(internal::pload(&X[j+5*PacketSize]), internal::pmul(ptmp,internal::ploadu(&A[j+5*PacketSize+iN]))));
|
||||
// internal::pstore(&X[j+6*PacketSize], internal::padd(internal::pload(&X[j+6*PacketSize]), internal::pmul(ptmp,internal::ploadu(&A[j+6*PacketSize+iN]))));
|
||||
// internal::pstore(&X[j+7*PacketSize], internal::padd(internal::pload(&X[j+7*PacketSize]), internal::pmul(ptmp,internal::ploadu(&A[j+7*PacketSize+iN]))));
|
||||
// }
|
||||
// for (int j = ANP;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pload(&X[j]), internal::pmul(ptmp,internal::ploadu(&A[j+iN]))));
|
||||
// #else
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// internal::pstore(&X[j], internal::padd(internal::pload(&X[j]), internal::pmul(ptmp,internal::ploadu(&A[j+iN]))));
|
||||
// #endif
|
||||
// }
|
||||
// }
|
||||
// // process remaining scalars
|
||||
// for (int j=AN;j<N;j++)
|
||||
// X[j] += tmp * A[j+iN];
|
||||
// }
|
||||
// asm("#end matrix_vector_product");
|
||||
// }
|
||||
|
||||
static inline void atv_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
{
|
||||
int AN = (N/PacketSize)*PacketSize;
|
||||
int bound = (N/4)*4;
|
||||
for (int i=0;i<bound;i+=4)
|
||||
{
|
||||
real tmp0 = 0;
|
||||
Packet ptmp0 = internal::pset1(real(0));
|
||||
real tmp1 = 0;
|
||||
Packet ptmp1 = internal::pset1(real(0));
|
||||
real tmp2 = 0;
|
||||
Packet ptmp2 = internal::pset1(real(0));
|
||||
real tmp3 = 0;
|
||||
Packet ptmp3 = internal::pset1(real(0));
|
||||
int iN0 = i*N;
|
||||
int iN1 = (i+1)*N;
|
||||
int iN2 = (i+2)*N;
|
||||
int iN3 = (i+3)*N;
|
||||
if (AN>0)
|
||||
{
|
||||
int align1 = (iN1 % PacketSize);
|
||||
if (align1==0)
|
||||
{
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
{
|
||||
Packet b = internal::pload(&B[j]);
|
||||
ptmp0 = internal::padd(ptmp0, internal::pmul(b, internal::pload(&A[j+iN0])));
|
||||
ptmp1 = internal::padd(ptmp1, internal::pmul(b, internal::pload(&A[j+iN1])));
|
||||
ptmp2 = internal::padd(ptmp2, internal::pmul(b, internal::pload(&A[j+iN2])));
|
||||
ptmp3 = internal::padd(ptmp3, internal::pmul(b, internal::pload(&A[j+iN3])));
|
||||
}
|
||||
}
|
||||
else if (align1==2)
|
||||
{
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
{
|
||||
Packet b = internal::pload(&B[j]);
|
||||
ptmp0 = internal::padd(ptmp0, internal::pmul(b, internal::pload(&A[j+iN0])));
|
||||
ptmp1 = internal::padd(ptmp1, internal::pmul(b, internal::ploadu(&A[j+iN1])));
|
||||
ptmp2 = internal::padd(ptmp2, internal::pmul(b, internal::pload(&A[j+iN2])));
|
||||
ptmp3 = internal::padd(ptmp3, internal::pmul(b, internal::ploadu(&A[j+iN3])));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
{
|
||||
Packet b = internal::pload(&B[j]);
|
||||
ptmp0 = internal::padd(ptmp0, internal::pmul(b, internal::pload(&A[j+iN0])));
|
||||
ptmp1 = internal::padd(ptmp1, internal::pmul(b, internal::ploadu(&A[j+iN1])));
|
||||
ptmp2 = internal::padd(ptmp2, internal::pmul(b, internal::ploadu(&A[j+iN2])));
|
||||
ptmp3 = internal::padd(ptmp3, internal::pmul(b, internal::ploadu(&A[j+iN3])));
|
||||
}
|
||||
}
|
||||
tmp0 = internal::predux(ptmp0);
|
||||
tmp1 = internal::predux(ptmp1);
|
||||
tmp2 = internal::predux(ptmp2);
|
||||
tmp3 = internal::predux(ptmp3);
|
||||
}
|
||||
// process remaining scalars
|
||||
for (int j=AN;j<N;j++)
|
||||
{
|
||||
tmp0 += B[j] * A[j+iN0];
|
||||
tmp1 += B[j] * A[j+iN1];
|
||||
tmp2 += B[j] * A[j+iN2];
|
||||
tmp3 += B[j] * A[j+iN3];
|
||||
}
|
||||
X[i+0] = tmp0;
|
||||
X[i+1] = tmp1;
|
||||
X[i+2] = tmp2;
|
||||
X[i+3] = tmp3;
|
||||
}
|
||||
|
||||
for (int i=bound;i<N;i++)
|
||||
{
|
||||
real tmp0 = 0;
|
||||
Packet ptmp0 = internal::pset1(real(0));
|
||||
int iN0 = i*N;
|
||||
if (AN>0)
|
||||
{
|
||||
if (iN0 % PacketSize==0)
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
ptmp0 = internal::padd(ptmp0, internal::pmul(internal::pload(&B[j]), internal::pload(&A[j+iN0])));
|
||||
else
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
ptmp0 = internal::padd(ptmp0, internal::pmul(internal::pload(&B[j]), internal::ploadu(&A[j+iN0])));
|
||||
tmp0 = internal::predux(ptmp0);
|
||||
}
|
||||
// process remaining scalars
|
||||
for (int j=AN;j<N;j++)
|
||||
tmp0 += B[j] * A[j+iN0];
|
||||
X[i+0] = tmp0;
|
||||
}
|
||||
}
|
||||
|
||||
// static inline void atv_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
|
||||
// {
|
||||
// int AN = (N/PacketSize)*PacketSize;
|
||||
// for (int i=0;i<N;i++)
|
||||
// X[i] = 0;
|
||||
// for (int i=0;i<N;i++)
|
||||
// {
|
||||
// real tmp = 0;
|
||||
// Packet ptmp = internal::pset1(real(0));
|
||||
// int iN = i*N;
|
||||
// if (AN>0)
|
||||
// {
|
||||
// bool aligned = (iN % PacketSize) == 0;
|
||||
// if (aligned)
|
||||
// {
|
||||
// #ifdef PEELING
|
||||
// int ANP = (AN/(8*PacketSize))*8*PacketSize;
|
||||
// for (int j = 0;j<ANP;j+=PacketSize*8)
|
||||
// {
|
||||
// ptmp =
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j]), internal::pload(&A[j+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+PacketSize]), internal::pload(&A[j+PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+2*PacketSize]), internal::pload(&A[j+2*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+3*PacketSize]), internal::pload(&A[j+3*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+4*PacketSize]), internal::pload(&A[j+4*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+5*PacketSize]), internal::pload(&A[j+5*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+6*PacketSize]), internal::pload(&A[j+6*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+7*PacketSize]), internal::pload(&A[j+7*PacketSize+iN])),
|
||||
// ptmp))))))));
|
||||
// }
|
||||
// for (int j = ANP;j<AN;j+=PacketSize)
|
||||
// ptmp = internal::padd(ptmp, internal::pmul(internal::pload(&B[j]), internal::pload(&A[j+iN])));
|
||||
// #else
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// ptmp = internal::padd(ptmp, internal::pmul(internal::pload(&B[j]), internal::pload(&A[j+iN])));
|
||||
// #endif
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// #ifdef PEELING
|
||||
// int ANP = (AN/(8*PacketSize))*8*PacketSize;
|
||||
// for (int j = 0;j<ANP;j+=PacketSize*8)
|
||||
// {
|
||||
// ptmp =
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j]), internal::ploadu(&A[j+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+PacketSize]), internal::ploadu(&A[j+PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+2*PacketSize]), internal::ploadu(&A[j+2*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+3*PacketSize]), internal::ploadu(&A[j+3*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+4*PacketSize]), internal::ploadu(&A[j+4*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+5*PacketSize]), internal::ploadu(&A[j+5*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+6*PacketSize]), internal::ploadu(&A[j+6*PacketSize+iN])),
|
||||
// internal::padd(internal::pmul(internal::pload(&B[j+7*PacketSize]), internal::ploadu(&A[j+7*PacketSize+iN])),
|
||||
// ptmp))))))));
|
||||
// }
|
||||
// for (int j = ANP;j<AN;j+=PacketSize)
|
||||
// ptmp = internal::padd(ptmp, internal::pmul(internal::pload(&B[j]), internal::ploadu(&A[j+iN])));
|
||||
// #else
|
||||
// for (int j = 0;j<AN;j+=PacketSize)
|
||||
// ptmp = internal::padd(ptmp, internal::pmul(internal::pload(&B[j]), internal::ploadu(&A[j+iN])));
|
||||
// #endif
|
||||
// }
|
||||
// tmp = internal::predux(ptmp);
|
||||
// }
|
||||
// // process remaining scalars
|
||||
// for (int j=AN;j<N;j++)
|
||||
// tmp += B[j] * A[j+iN];
|
||||
// X[i] = tmp;
|
||||
// }
|
||||
// }
|
||||
|
||||
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||
int AN = (N/PacketSize)*PacketSize;
|
||||
if (AN>0)
|
||||
{
|
||||
Packet pcoef = internal::pset1(coef);
|
||||
#ifdef PEELING
|
||||
const int peelSize = 3;
|
||||
int ANP = (AN/(peelSize*PacketSize))*peelSize*PacketSize;
|
||||
float* X1 = X + PacketSize;
|
||||
float* Y1 = Y + PacketSize;
|
||||
float* X2 = X + 2*PacketSize;
|
||||
float* Y2 = Y + 2*PacketSize;
|
||||
Packet x0,x1,x2,y0,y1,y2;
|
||||
for (int j = 0;j<ANP;j+=PacketSize*peelSize)
|
||||
{
|
||||
x0 = internal::pload(X+j);
|
||||
x1 = internal::pload(X1+j);
|
||||
x2 = internal::pload(X2+j);
|
||||
|
||||
y0 = internal::pload(Y+j);
|
||||
y1 = internal::pload(Y1+j);
|
||||
y2 = internal::pload(Y2+j);
|
||||
|
||||
y0 = internal::pmadd(pcoef, x0, y0);
|
||||
y1 = internal::pmadd(pcoef, x1, y1);
|
||||
y2 = internal::pmadd(pcoef, x2, y2);
|
||||
|
||||
internal::pstore(Y+j, y0);
|
||||
internal::pstore(Y1+j, y1);
|
||||
internal::pstore(Y2+j, y2);
|
||||
// internal::pstore(&Y[j+2*PacketSize], internal::padd(internal::pload(&Y[j+2*PacketSize]), internal::pmul(pcoef,internal::pload(&X[j+2*PacketSize]))));
|
||||
// internal::pstore(&Y[j+3*PacketSize], internal::padd(internal::pload(&Y[j+3*PacketSize]), internal::pmul(pcoef,internal::pload(&X[j+3*PacketSize]))));
|
||||
// internal::pstore(&Y[j+4*PacketSize], internal::padd(internal::pload(&Y[j+4*PacketSize]), internal::pmul(pcoef,internal::pload(&X[j+4*PacketSize]))));
|
||||
// internal::pstore(&Y[j+5*PacketSize], internal::padd(internal::pload(&Y[j+5*PacketSize]), internal::pmul(pcoef,internal::pload(&X[j+5*PacketSize]))));
|
||||
// internal::pstore(&Y[j+6*PacketSize], internal::padd(internal::pload(&Y[j+6*PacketSize]), internal::pmul(pcoef,internal::pload(&X[j+6*PacketSize]))));
|
||||
// internal::pstore(&Y[j+7*PacketSize], internal::padd(internal::pload(&Y[j+7*PacketSize]), internal::pmul(pcoef,internal::pload(&X[j+7*PacketSize]))));
|
||||
}
|
||||
for (int j = ANP;j<AN;j+=PacketSize)
|
||||
internal::pstore(&Y[j], internal::padd(internal::pload(&Y[j]), internal::pmul(pcoef,internal::pload(&X[j]))));
|
||||
#else
|
||||
for (int j = 0;j<AN;j+=PacketSize)
|
||||
internal::pstore(&Y[j], internal::padd(internal::pload(&Y[j]), internal::pmul(pcoef,internal::pload(&X[j]))));
|
||||
#endif
|
||||
}
|
||||
// process remaining scalars
|
||||
for (int i=AN;i<N;i++)
|
||||
Y[i] += coef * X[i];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -1,50 +0,0 @@
|
||||
//=====================================================
|
||||
// File : main.cpp
|
||||
// Author : L. Plagne <laurent.plagne@edf.fr)>
|
||||
// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#include "utilities.h"
|
||||
#include "bench.hh"
|
||||
#include "hand_vec_interface.hh"
|
||||
#include "action_matrix_vector_product.hh"
|
||||
#include "action_atv_product.hh"
|
||||
#include "action_matrix_matrix_product.hh"
|
||||
#include "action_axpy.hh"
|
||||
#include "action_ata_product.hh"
|
||||
#include "action_aat_product.hh"
|
||||
#include "basic_actions.hh"
|
||||
//#include "action_lu_solve.hh"
|
||||
// #include "timers/mixed_perf_analyzer.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
bench<Action_matrix_vector_product<hand_vec_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
bench<Action_atv_product<hand_vec_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||
// bench<Action_matrix_matrix_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
// bench<Action_aat_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
// bench<Action_ata_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_symv<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_axpy<hand_vec_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user