BTL: clean the BLAS implementation

This commit is contained in:
Gael Guennebaud 2011-03-23 10:35:54 +01:00
parent e35b1ef3f3
commit f9da1ccc3b
10 changed files with 383 additions and 1031 deletions

View File

@ -90,7 +90,7 @@ endmacro(btl_add_target_property)
ENABLE_TESTING()
add_subdirectory(libs/eigen3)
add_subdirectory(libs/C_BLAS)
add_subdirectory(libs/BLAS)
add_subdirectory(libs/ublas)
add_subdirectory(libs/gmm)
add_subdirectory(libs/mtl4)

View File

@ -0,0 +1,83 @@
//=====================================================
// File : blas_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef blas_PRODUIT_MATRICE_VECTEUR_HH
#define blas_PRODUIT_MATRICE_VECTEUR_HH
#include <c_interface_base.h>
#include <complex>
extern "C"
{
#include "blas.h"
// Cholesky Factorization
// void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
// void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work, int *lwork, int *info );
void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
void dgehrd_( const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork, int *info );
// LU row pivoting
// void dgetrf_( int *m, int *n, double *a, int *lda, int *ipiv, int *info );
// void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info);
// LU full pivoting
void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info );
void dgetc2_(const int* n, double *a, const int *lda, int *ipiv, int *jpiv, int*info );
#ifdef HAS_LAPACK
#endif
}
#define MAKE_STRING2(S) #S
#define MAKE_STRING(S) MAKE_STRING2(S)
#define CAT2(A,B) A##B
#define CAT(A,B) CAT2(A,B)
template<class real> class blas_interface;
static char notrans = 'N';
static char trans = 'T';
static char nonunit = 'N';
static char lower = 'L';
static char right = 'R';
static char left = 'L';
static int intone = 1;
#define SCALAR float
#define SCALAR_PREFIX s
#include "blas_interface_impl.hh"
#undef SCALAR
#undef SCALAR_PREFIX
#define SCALAR double
#define SCALAR_PREFIX d
#include "blas_interface_impl.hh"
#undef SCALAR
#undef SCALAR_PREFIX
#endif

View File

@ -0,0 +1,153 @@
#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
{
public :
static SCALAR fone;
static SCALAR fzero;
static inline std::string name()
{
return MAKE_STRING(CBLASNAME);
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(gemv)(&notrans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
}
static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone);
}
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
}
static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N);
}
static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){
BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s);
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
}
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
}
// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
// }
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
BLAS_FUNC(syrk)(&lower,&notrans,&N,&N,&fone,A,&N,&fzero,X,&N);
}
static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){
BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone);
}
static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){
BLAS_FUNC(scal)(&N,&b,Y,&intone);
BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone);
}
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
BLAS_FUNC(potrf)(&uplo, &N, C, &N, &info);
if(info!=0) std::cerr << "potrf_ error " << info << "\n";
}
static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
int * ipiv = (int*)alloca(sizeof(int)*N);
BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info);
if(info!=0) std::cerr << "getrf_ error " << info << "\n";
}
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
BLAS_FUNC(trsv)(&lower, &notrans, &nonunit, &N, L, &N, X, &intone);
}
static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
BLAS_FUNC(trsm)(&right, &lower, &notrans, &nonunit, &N, &N, &fone, L, &N, X, &N);
}
static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
BLAS_FUNC(trmm)(&left, &lower, &notrans,&nonunit, &N,&N,&fone,A,&N,B,&N);
}
#ifdef HAS_LAPACK
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
int * ipiv = (int*)alloca(sizeof(int)*N);
int * jpiv = (int*)alloca(sizeof(int)*N);
BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info);
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
{
int N2 = N*N;
int inc = 1;
BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
}
int info = 0;
int ilo = 1;
int ihi = N;
int bsize = 64;
int worksize = N*bsize;
SCALAR* d = new SCALAR[N+worksize];
BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info);
delete[] d;
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
{
int N2 = N*N;
int inc = 1;
BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
}
char uplo = 'U';
int info = 0;
int ilo = 1;
int ihi = N;
int bsize = 64;
int worksize = N*bsize;
SCALAR* d = new SCALAR[3*N+worksize];
BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info);
delete[] d;
}
#endif // HAS_LAPACK
};
SCALAR blas_interface<SCALAR>::fone = SCALAR(1);
SCALAR blas_interface<SCALAR>::fzero = SCALAR(0);

View File

@ -0,0 +1,73 @@
#ifndef BTL_C_INTERFACE_BASE_H
#define BTL_C_INTERFACE_BASE_H
#include "utilities.h"
#include <vector>
template<class real> class c_interface_base
{
public:
typedef real real_type;
typedef std::vector<real> stl_vector;
typedef std::vector<stl_vector > stl_matrix;
typedef real* gene_matrix;
typedef real* gene_vector;
static void free_matrix(gene_matrix & A, int N){
delete A;
}
static void free_vector(gene_vector & B){
delete B;
}
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
int N = A_stl.size();
A = new real[N*N];
for (int j=0;j<N;j++)
for (int i=0;i<N;i++)
A[i+N*j] = A_stl[j][i];
}
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
int N = B_stl.size();
B = new real[N];
for (int i=0;i<N;i++)
B[i] = B_stl[i];
}
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
int N = B_stl.size();
for (int i=0;i<N;i++)
B_stl[i] = B[i];
}
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
int N = A_stl.size();
for (int j=0;j<N;j++){
A_stl[j].resize(N);
for (int i=0;i<N;i++)
A_stl[j][i] = A[i+N*j];
}
}
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
for (int i=0;i<N;i++)
cible[i]=source[i];
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
for (int j=0;j<N;j++){
for (int i=0;i<N;i++){
cible[i+N*j] = source[i+N*j];
}
}
}
};
#endif

View File

@ -0,0 +1,73 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "blas_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
#include "action_cholesky.hh"
#include "action_lu_decomp.hh"
#include "action_partial_lu.hh"
#include "action_trisolve_matrix.hh"
#ifdef HAS_LAPACK
#include "action_hessenberg.hh"
#endif
BTL_MAIN;
int main()
{
bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_symv<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_syr2<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_ger<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve_matrix<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trmm<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_partial_lu<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#ifdef HAS_LAPACK
bench<Action_lu_decomp<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_hessenberg<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_tridiagonalization<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#endif
//bench<Action_lu_solve<blas_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0;
}

View File

@ -1,361 +0,0 @@
//=====================================================
// File : C_BLAS_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef C_BLAS_PRODUIT_MATRICE_VECTEUR_HH
#define C_BLAS_PRODUIT_MATRICE_VECTEUR_HH
#include "f77_interface.hh"
#include <complex>
extern "C"
{
#include "cblas.h"
// #ifdef PUREBLAS
#include "blas.h"
// #endif
// void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k,
// const float *alpha, const float *a, const int *lda, const float *b, const int *ldb,
// const float *beta, float *c, const int *ldc);
//
// void sgemv_(const char *trans, const int *m, const int *n, const float *alpha,
// const float *a, const int *lda, const float *x, const int *incx,
// const float *beta, float *y, const int *incy);
//
// void ssymv_(const char *trans, const char* uplo,
// const int* N, const float* alpha, const float *A,
// const int* lda, const float *X, const int* incX,
// const float* beta, float *Y, const int* incY);
//
// void sscal_(const int *n, const float *alpha, const float *x, const int *incx);
//
// void saxpy_(const int *n, const float *alpha, const float *x, const int *incx,
// float *y, const int *incy);
//
// void strsv_(const char *uplo, const char *trans, const char *diag, const int *n,
// const float *a, const int *lda, float *x, const int *incx);
//
// void scopy_(const int *n, const float *x, const int *incx, float *y, const int *incy);
// Cholesky Factorization
// #include "mkl_lapack.h"
// void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
// void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
// LU row pivoting
// void dgetrf_( int *m, int *n, double *a, int *lda, int *ipiv, int *info );
// void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info);
// LU full pivoting
void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info );
#ifdef HAS_LAPACK
#endif
}
#define MAKE_STRING2(S) #S
#define MAKE_STRING(S) MAKE_STRING2(S)
template<class real>
class C_BLAS_interface : public f77_interface_base<real>
{
public :
typedef typename f77_interface_base<real>::gene_matrix gene_matrix;
typedef typename f77_interface_base<real>::gene_vector gene_vector;
static inline std::string name( void )
{
return MAKE_STRING(CBLASNAME);
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
cblas_dgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
cblas_dsymv(CblasColMajor,CblasLower,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
// cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
// }
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
//cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
cblas_dsyrk(CblasColMajor, CblasLower, CblasTrans, N, N, 1.0, A, N, 0.0, X, N);
}
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
cblas_daxpy(N,coef,X,1,Y,1);
}
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
cblas_dscal(N,b,Y,1);
cblas_daxpy(N,a,X,1,Y,1);
}
};
static float fone = 1;
static float fzero = 0;
static char notrans = 'N';
static char trans = 'T';
static char nonunit = 'N';
static char lower = 'L';
static char right = 'R';
static char left = 'L';
static int intone = 1;
template<>
class C_BLAS_interface<float> : public f77_interface_base<float>
{
public :
static inline std::string name( void )
{
return MAKE_STRING(CBLASNAME);
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
#ifdef PUREBLAS
sgemv_(&notrans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
#else
cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
#endif
}
static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
#ifdef PUREBLAS
ssymv_(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone);
#else
cblas_ssymv(CblasColMajor,CblasLower,N,1.0,A,N,B,1,0.0,X,1);
#endif
}
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
#ifdef PUREBLAS
ssyr2_(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
#else
cblas_ssyr2(CblasColMajor,CblasLower,N,1.0,B,1,X,1,A,N);
#endif
}
static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
#ifdef PUREBLAS
sger_(&N,&N,&fone,X,&intone,Y,&intone,A,&N);
#else
cblas_sger(CblasColMajor,N,N,1.0,X,1,Y,1,A,N);
#endif
}
static inline void rot(gene_vector & A, gene_vector & B, float c, float s, int N){
#ifdef PUREBLAS
srot_(&N,A,&intone,B,&intone,&c,&s);
#else
cblas_srot(N,A,1,B,1,c,s);
#endif
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
#ifdef PUREBLAS
sgemv_(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
#else
cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
#endif
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
#ifdef PUREBLAS
sgemm_(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
#else
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
#endif
}
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
#ifdef PUREBLAS
sgemm_(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
#else
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
#endif
}
// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
// #ifdef PUREBLAS
// sgemm_(&trans,&notrans,&N,&N,&N,&fone,A,&N,A,&N,&fzero,X,&N);
// #else
// cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
// #endif
// }
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
#ifdef PUREBLAS
// sgemm_(&notrans,&trans,&N,&N,&N,&fone,A,&N,A,&N,&fzero,X,&N);
ssyrk_(&lower,&notrans,&N,&N,&fone,A,&N,&fzero,X,&N);
#else
// cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
cblas_ssyrk(CblasColMajor, CblasLower, CblasNoTrans, N, N, 1.0, A, N, 0.0, X, N);
#endif
}
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N){
#ifdef PUREBLAS
saxpy_(&N,&coef,X,&intone,Y,&intone);
#else
cblas_saxpy(N,coef,X,1,Y,1);
#endif
}
static inline void axpby(float a, const gene_vector & X, float b, gene_vector & Y, int N){
#ifdef PUREBLAS
sscal_(&N,&b,Y,&intone);
saxpy_(&N,&a,X,&intone,Y,&intone);
#else
cblas_sscal(N,b,Y,1);
cblas_saxpy(N,a,X,1,Y,1);
#endif
}
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
scopy_(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
spotrf_(&uplo, &N, C, &N, &info);
if(info!=0) std::cerr << "spotrf_ error " << info << "\n";
}
static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
scopy_(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
int * ipiv = (int*)alloca(sizeof(int)*N);
sgetrf_(&N, &N, C, &N, ipiv, &info);
if(info!=0) std::cerr << "sgetrf_ error " << info << "\n";
}
#ifdef HAS_LAPACK
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
scopy_(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
int * ipiv = (int*)alloca(sizeof(int)*N);
int * jpiv = (int*)alloca(sizeof(int)*N);
sgetc2_(&N, C, &N, ipiv, jpiv, &info);
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
#ifdef PUREBLAS
{
int N2 = N*N;
int inc = 1;
scopy_(&N2, X, &inc, C, &inc);
}
#else
cblas_scopy(N*N, X, 1, C, 1);
#endif
int info = 0;
int ilo = 1;
int ihi = N;
int bsize = 64;
int worksize = N*bsize;
float* d = new float[N+worksize];
sgehrd_(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info);
delete[] d;
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
#ifdef PUREBLAS
{
int N2 = N*N;
int inc = 1;
scopy_(&N2, X, &inc, C, &inc);
}
#else
cblas_scopy(N*N, X, 1, C, 1);
#endif
char uplo = 'U';
int info = 0;
int ilo = 1;
int ihi = N;
int bsize = 64;
int worksize = N*bsize;
float* d = new float[3*N+worksize];
ssytrd_(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info);
delete[] d;
}
#endif
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
#ifdef PUREBLAS
scopy_(&N, B, &intone, X, &intone);
strsv_(&lower, &notrans, &nonunit, &N, L, &N, X, &intone);
#else
cblas_scopy(N, B, 1, X, 1);
cblas_strsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, 1);
#endif
}
static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
#ifdef PUREBLAS
scopy_(&N, B, &intone, X, &intone);
strsm_(&right, &lower, &notrans, &nonunit, &N, &N, &fone, L, &N, X, &N);
#else
cblas_scopy(N, B, 1, X, 1);
cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, 1, L, N, X, N);
#endif
}
static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
#ifdef PUREBLAS
strmm_(&left, &lower, &notrans,&nonunit, &N,&N,&fone,A,&N,B,&N);
#else
cblas_strmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans,CblasNonUnit, N,N,1,A,N,B,N);
#endif
}
};
#endif

View File

@ -1,596 +0,0 @@
#ifndef CBLAS_H
#ifndef CBLAS_ENUM_DEFINED_H
#define CBLAS_ENUM_DEFINED_H
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102 };
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113,
AtlasConj=114};
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#endif
#ifndef CBLAS_ENUM_ONLY
#define CBLAS_H
#define CBLAS_INDEX int
int cblas_errprn(int ierr, int info, char *form, ...);
/*
* ===========================================================================
* Prototypes for level 1 BLAS functions (complex are recast as routines)
* ===========================================================================
*/
float cblas_sdsdot(const int N, const float alpha, const float *X,
const int incX, const float *Y, const int incY);
double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
const int incY);
float cblas_sdot(const int N, const float *X, const int incX,
const float *Y, const int incY);
double cblas_ddot(const int N, const double *X, const int incX,
const double *Y, const int incY);
/*
* Functions having prefixes Z and C only
*/
void cblas_cdotu_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotu);
void cblas_cdotc_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotc);
void cblas_zdotu_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotu);
void cblas_zdotc_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotc);
/*
* Functions having prefixes S D SC DZ
*/
float cblas_snrm2(const int N, const float *X, const int incX);
float cblas_sasum(const int N, const float *X, const int incX);
double cblas_dnrm2(const int N, const double *X, const int incX);
double cblas_dasum(const int N, const double *X, const int incX);
float cblas_scnrm2(const int N, const void *X, const int incX);
float cblas_scasum(const int N, const void *X, const int incX);
double cblas_dznrm2(const int N, const void *X, const int incX);
double cblas_dzasum(const int N, const void *X, const int incX);
/*
* Functions having standard 4 prefixes (S D C Z)
*/
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX);
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
/*
* ===========================================================================
* Prototypes for level 1 BLAS routines
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (s, d, c, z)
*/
void cblas_sswap(const int N, float *X, const int incX,
float *Y, const int incY);
void cblas_scopy(const int N, const float *X, const int incX,
float *Y, const int incY);
void cblas_saxpy(const int N, const float alpha, const float *X,
const int incX, float *Y, const int incY);
void catlas_saxpby(const int N, const float alpha, const float *X,
const int incX, const float beta, float *Y, const int incY);
void catlas_sset
(const int N, const float alpha, float *X, const int incX);
void cblas_dswap(const int N, double *X, const int incX,
double *Y, const int incY);
void cblas_dcopy(const int N, const double *X, const int incX,
double *Y, const int incY);
void cblas_daxpy(const int N, const double alpha, const double *X,
const int incX, double *Y, const int incY);
void catlas_daxpby(const int N, const double alpha, const double *X,
const int incX, const double beta, double *Y, const int incY);
void catlas_dset
(const int N, const double alpha, double *X, const int incX);
void cblas_cswap(const int N, void *X, const int incX,
void *Y, const int incY);
void cblas_ccopy(const int N, const void *X, const int incX,
void *Y, const int incY);
void cblas_caxpy(const int N, const void *alpha, const void *X,
const int incX, void *Y, const int incY);
void catlas_caxpby(const int N, const void *alpha, const void *X,
const int incX, const void *beta, void *Y, const int incY);
void catlas_cset
(const int N, const void *alpha, void *X, const int incX);
void cblas_zswap(const int N, void *X, const int incX,
void *Y, const int incY);
void cblas_zcopy(const int N, const void *X, const int incX,
void *Y, const int incY);
void cblas_zaxpy(const int N, const void *alpha, const void *X,
const int incX, void *Y, const int incY);
void catlas_zaxpby(const int N, const void *alpha, const void *X,
const int incX, const void *beta, void *Y, const int incY);
void catlas_zset
(const int N, const void *alpha, void *X, const int incX);
/*
* Routines with S and D prefix only
*/
void cblas_srotg(float *a, float *b, float *c, float *s);
void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
void cblas_srot(const int N, float *X, const int incX,
float *Y, const int incY, const float c, const float s);
void cblas_srotm(const int N, float *X, const int incX,
float *Y, const int incY, const float *P);
void cblas_drotg(double *a, double *b, double *c, double *s);
void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
void cblas_drot(const int N, double *X, const int incX,
double *Y, const int incY, const double c, const double s);
void cblas_drotm(const int N, double *X, const int incX,
double *Y, const int incY, const double *P);
/*
* Routines with S D C Z CS and ZD prefixes
*/
void cblas_sscal(const int N, const float alpha, float *X, const int incX);
void cblas_dscal(const int N, const double alpha, double *X, const int incX);
void cblas_cscal(const int N, const void *alpha, void *X, const int incX);
void cblas_zscal(const int N, const void *alpha, void *X, const int incX);
void cblas_csscal(const int N, const float alpha, void *X, const int incX);
void cblas_zdscal(const int N, const double alpha, void *X, const int incX);
/*
* Extra reference routines provided by ATLAS, but not mandated by the standard
*/
void cblas_crotg(void *a, void *b, void *c, void *s);
void cblas_zrotg(void *a, void *b, void *c, void *s);
void cblas_csrot(const int N, void *X, const int incX, void *Y, const int incY,
const float c, const float s);
void cblas_zdrot(const int N, void *X, const int incX, void *Y, const int incY,
const double c, const double s);
/*
* ===========================================================================
* Prototypes for level 2 BLAS
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void cblas_sgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const float alpha, const float *A, const int lda,
const float *X, const int incX, const float beta,
float *Y, const int incY);
void cblas_sgbmv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const float alpha,
const float *A, const int lda, const float *X,
const int incX, const float beta, float *Y, const int incY);
void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const float *A, const int lda,
float *X, const int incX);
void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const float *A, const int lda,
float *X, const int incX);
void cblas_stpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const float *Ap, float *X, const int incX);
void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const float *A, const int lda, float *X,
const int incX);
void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const float *A, const int lda,
float *X, const int incX);
void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const float *Ap, float *X, const int incX);
void cblas_dgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const double alpha, const double *A, const int lda,
const double *X, const int incX, const double beta,
double *Y, const int incY);
void cblas_dgbmv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const double alpha,
const double *A, const int lda, const double *X,
const int incX, const double beta, double *Y, const int incY);
void cblas_dtrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *A, const int lda,
double *X, const int incX);
void cblas_dtbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const double *A, const int lda,
double *X, const int incX);
void cblas_dtpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *Ap, double *X, const int incX);
void cblas_dtrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *A, const int lda, double *X,
const int incX);
void cblas_dtbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const double *A, const int lda,
double *X, const int incX);
void cblas_dtpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *Ap, double *X, const int incX);
void cblas_cgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY);
void cblas_cgbmv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const void *alpha,
const void *A, const int lda, const void *X,
const int incX, const void *beta, void *Y, const int incY);
void cblas_ctrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *A, const int lda,
void *X, const int incX);
void cblas_ctbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ctpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
void cblas_ctrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *A, const int lda, void *X,
const int incX);
void cblas_ctbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ctpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
void cblas_zgemv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY);
void cblas_zgbmv(const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const void *alpha,
const void *A, const int lda, const void *X,
const int incX, const void *beta, void *Y, const int incY);
void cblas_ztrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *A, const int lda,
void *X, const int incX);
void cblas_ztbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *A, const int lda, void *X,
const int incX);
void cblas_ztbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ztpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
/*
* Routines with S and D prefixes only
*/
void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const float *A,
const int lda, const float *X, const int incX,
const float beta, float *Y, const int incY);
void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const int K, const float alpha, const float *A,
const int lda, const float *X, const int incX,
const float beta, float *Y, const int incY);
void cblas_sspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const float *Ap,
const float *X, const int incX,
const float beta, float *Y, const int incY);
void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N,
const float alpha, const float *X, const int incX,
const float *Y, const int incY, float *A, const int lda);
void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, float *A, const int lda);
void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, float *Ap);
void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, const float *Y, const int incY, float *A,
const int lda);
void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, const float *Y, const int incY, float *A);
void cblas_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *A,
const int lda, const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dsbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const int K, const double alpha, const double *A,
const int lda, const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *Ap,
const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N,
const double alpha, const double *X, const int incX,
const double *Y, const int incY, double *A, const int lda);
void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, double *A, const int lda);
void cblas_dspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, double *Ap);
void cblas_dsyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, const double *Y, const int incY, double *A,
const int lda);
void cblas_dspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, const double *Y, const int incY, double *A);
/*
* Routines with C and Z prefixes only
*/
void cblas_chemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_chbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const int K, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_chpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *Ap,
const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_cgeru(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_cgerc(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_cher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const void *X, const int incX,
void *A, const int lda);
void cblas_chpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const float alpha, const void *X,
const int incX, void *A);
void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_chpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *Ap);
void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const int K, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *Ap,
const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const void *X, const int incX,
void *A, const int lda);
void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const void *X,
const int incX, void *A);
void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *Ap);
/*
* ===========================================================================
* Prototypes for level 3 BLAS
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const float alpha, const float *A,
const int lda, const float *B, const int ldb,
const float beta, float *C, const int ldc);
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const int M, const int N,
const float alpha, const float *A, const int lda,
const float *B, const int ldb, const float beta,
float *C, const int ldc);
void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const float alpha, const float *A, const int lda,
const float beta, float *C, const int ldc);
void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const float alpha, const float *A, const int lda,
const float *B, const int ldb, const float beta,
float *C, const int ldc);
void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const float alpha, const float *A, const int lda,
float *B, const int ldb);
void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const float alpha, const float *A, const int lda,
float *B, const int ldb);
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc);
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const int M, const int N,
const double alpha, const double *A, const int lda,
const double *B, const int ldb, const double beta,
double *C, const int ldc);
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const double *A, const int lda,
const double beta, double *C, const int ldc);
void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const double *A, const int lda,
const double *B, const int ldb, const double beta,
double *C, const int ldc);
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const double alpha, const double *A, const int lda,
double *B, const int ldb);
void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const double alpha, const double *A, const int lda,
double *B, const int ldb);
void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const void *alpha, const void *A,
const int lda, const void *B, const int ldb,
const void *beta, void *C, const int ldc);
void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *beta, void *C, const int ldc);
void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const void *alpha, const void *A,
const int lda, const void *B, const int ldb,
const void *beta, void *C, const int ldc);
void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *beta, void *C, const int ldc);
void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
/*
* Routines with prefixes C and Z only
*/
void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const float alpha, const void *A, const int lda,
const float beta, void *C, const int ldc);
void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const float beta,
void *C, const int ldc);
void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const void *A, const int lda,
const double beta, void *C, const int ldc);
void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const double beta,
void *C, const int ldc);
int cblas_errprn(int ierr, int info, char *form, ...);
#endif /* end #ifdef CBLAS_ENUM_ONLY */
#endif

View File

@ -1,73 +0,0 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "C_BLAS_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
#include "action_cholesky.hh"
#include "action_lu_decomp.hh"
#include "action_partial_lu.hh"
#include "action_trisolve_matrix.hh"
#ifdef HAS_LAPACK
#include "action_hessenberg.hh"
#endif
BTL_MAIN;
int main()
{
bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_symv<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_syr2<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_ger<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_rot<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve_matrix<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trmm<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_partial_lu<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#ifdef HAS_LAPACK
bench<Action_lu_decomp<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_hessenberg<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_tridiagonalization<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#endif
//bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0;
}