#include #include extern "C" { #include #include void sgemm_kernel(int actual_mc, int cols, int actual_kc, float alpha, float* blockA, float* blockB, float* res, int resStride); void sgemm_otcopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); void sgemm_oncopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); void sgemm_itcopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); void sgemm_incopy(int actual_kc, int cols, const float* rhs, int rhsStride, float* blockB); } using namespace std; using namespace Eigen; #ifndef SCALAR #define SCALAR float #endif typedef SCALAR Scalar; typedef Matrix M; static float fone = 1; static float fzero = 0; static double done = 1; static double szero = 0; static char notrans = 'N'; static char trans = 'T'; static char nonunit = 'N'; static char lower = 'L'; static char right = 'R'; static int intone = 1; void blas_gemm(const MatrixXf& a, const MatrixXf& b, MatrixXf& c) { int M = c.rows(); int N = c.cols(); int K = a.cols(); int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows(); // c.noalias() += a * b; sgemm_(¬rans,¬rans,&M,&N,&K,&fone, const_cast(a.data()),&lda, const_cast(b.data()),&ldb,&fone, c.data(),&ldc); } void blas_gemm(const MatrixXd& a, const MatrixXd& b, MatrixXd& c) { int M = c.rows(); int N = c.cols(); int K = a.cols(); int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows(); // c.noalias() += a * b; dgemm_(¬rans,¬rans,&M,&N,&K,&done, const_cast(a.data()),&lda, const_cast(b.data()),&ldb,&done, c.data(),&ldc); } int main(int argc, char **argv) { int rep = 1; int s = 2048; int m = s; int n = s; int p = s; const int N = 1; M a[N]; M b[N]; M c[N]; for (int k=0; k