mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
extend monitoring benchmarks with transpose matrix-vector and triangular matrix-vectors.
This commit is contained in:
parent
e3f613cbd4
commit
445c015751
@ -25,13 +25,13 @@ before-evaluators
|
|||||||
#6742:0cbd6195e829 # merge default to tensors
|
#6742:0cbd6195e829 # merge default to tensors
|
||||||
#6747:853d2bafeb8f # Generalized the gebp apis
|
#6747:853d2bafeb8f # Generalized the gebp apis
|
||||||
6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation
|
6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation
|
||||||
#6781:9cc5a931b2c6 # generalized gemv
|
6781:9cc5a931b2c6 # generalized gemv
|
||||||
#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
|
6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
|
||||||
#6844:039efd86b75c # merge tensor
|
#6844:039efd86b75c # merge tensor
|
||||||
6845:7333ed40c6ef # change prefetching in gebp
|
6845:7333ed40c6ef # change prefetching in gebp
|
||||||
#6856:b5be5e10eb7f # merge index conversion
|
#6856:b5be5e10eb7f # merge index conversion
|
||||||
#6893:c3a64aba7c70 # clean blocking size computation
|
6893:c3a64aba7c70 # clean blocking size computation
|
||||||
#6898:6fb31ebe6492 # rotating kernel for ARM
|
6898:6fb31ebe6492 # rotating kernel for ARM
|
||||||
6899:877facace746 # rotating kernel for ARM only
|
6899:877facace746 # rotating kernel for ARM only
|
||||||
#6904:c250623ae9fa # result_of
|
#6904:c250623ae9fa # result_of
|
||||||
6921:915f1b1fc158 # fix prefetching change for ARM
|
6921:915f1b1fc158 # fix prefetching change for ARM
|
||||||
@ -50,7 +50,7 @@ before-evaluators
|
|||||||
7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code
|
7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code
|
||||||
7591:09a8e2186610 # 3.3-alpha1
|
7591:09a8e2186610 # 3.3-alpha1
|
||||||
7650:b0f3c8f43025 # help clang inlining
|
7650:b0f3c8f43025 # help clang inlining
|
||||||
#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs)
|
8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs)
|
||||||
8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes
|
8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes
|
||||||
8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path
|
8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path
|
||||||
8985:d935df21a082 # Remove the rotating kernel.
|
8985:d935df21a082 # Remove the rotating kernel.
|
||||||
@ -59,6 +59,11 @@ before-evaluators
|
|||||||
9174:d228bc282ac9 # merge
|
9174:d228bc282ac9 # merge
|
||||||
9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955
|
9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955
|
||||||
9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775
|
9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775
|
||||||
3.3-beta2
|
9361:69d418c06999 # 3.3-beta2
|
||||||
3.3-rc1
|
9583:bef509908b9d # 3.3-rc1
|
||||||
3.3.0
|
9792:26667be4f70b # 3.3.0
|
||||||
|
9942:b1d3eba60130 # Operators += and -= do not resize!
|
||||||
|
9943:79bb9887afd4 # Ease compiler job to generate clean and efficient code in mat*vec
|
||||||
|
9946:2213991340ea # Complete rewrite of column-major-matrix * vector product to deliver higher performance of modern CPU.
|
||||||
|
9953:21acc0e8d782 # Improve performance of row-major-dense-matrix * vector products for recent CPUs.
|
||||||
|
|
||||||
|
@ -53,7 +53,10 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
std::vector<double> results;
|
std::vector<double> results;
|
||||||
|
|
||||||
std::ifstream settings("gemm_settings.txt");
|
std::string filename = std::string("gemm_settings.txt");
|
||||||
|
if(argc>1)
|
||||||
|
filename = std::string(argv[1]);
|
||||||
|
std::ifstream settings(filename);
|
||||||
long m, n, k;
|
long m, n, k;
|
||||||
while(settings >> m >> n >> k)
|
while(settings >> m >> n >> k)
|
||||||
{
|
{
|
||||||
|
@ -1,18 +1,4 @@
|
|||||||
#include <iostream>
|
#include "gemv_common.h"
|
||||||
#include <fstream>
|
|
||||||
#include <vector>
|
|
||||||
#include <Eigen/Core>
|
|
||||||
#include "../../BenchTimer.h"
|
|
||||||
using namespace Eigen;
|
|
||||||
|
|
||||||
#ifndef SCALAR
|
|
||||||
#error SCALAR must be defined
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef SCALAR Scalar;
|
|
||||||
|
|
||||||
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
|
||||||
typedef Matrix<Scalar,Dynamic,1> Vec;
|
|
||||||
|
|
||||||
EIGEN_DONT_INLINE
|
EIGEN_DONT_INLINE
|
||||||
void gemv(const Mat &A, const Vec &B, Vec &C)
|
void gemv(const Mat &A, const Vec &B, Vec &C)
|
||||||
@ -20,49 +6,7 @@ void gemv(const Mat &A, const Vec &B, Vec &C)
|
|||||||
C.noalias() += A * B;
|
C.noalias() += A * B;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DONT_INLINE
|
|
||||||
double bench(long m, long n)
|
|
||||||
{
|
|
||||||
Mat A(m,n);
|
|
||||||
Vec B(n);
|
|
||||||
Vec C(m);
|
|
||||||
A.setRandom();
|
|
||||||
B.setRandom();
|
|
||||||
C.setZero();
|
|
||||||
|
|
||||||
BenchTimer t;
|
|
||||||
|
|
||||||
double up = 1e9*4/sizeof(Scalar);
|
|
||||||
double tm0 = 4, tm1 = 10;
|
|
||||||
if(NumTraits<Scalar>::IsComplex)
|
|
||||||
{
|
|
||||||
up /= 4;
|
|
||||||
tm0 = 2;
|
|
||||||
tm1 = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
double flops = 2. * m * n;
|
|
||||||
long rep = std::max(1., std::min(100., up/flops) );
|
|
||||||
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
|
||||||
|
|
||||||
BENCH(t, tries, rep, gemv(A,B,C));
|
|
||||||
|
|
||||||
return 1e-9 * rep * flops / t.best();
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
std::vector<double> results;
|
return main_gemv(argc, argv, gemv);
|
||||||
|
|
||||||
std::ifstream settings("gemv_settings.txt");
|
|
||||||
long m, n;
|
|
||||||
while(settings >> m >> n)
|
|
||||||
{
|
|
||||||
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
|
|
||||||
results.push_back( bench(m, n) );
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << RowVectorXd::Map(results.data(), results.size());
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
68
bench/perf_monitoring/gemm/gemv_common.h
Normal file
68
bench/perf_monitoring/gemm/gemv_common.h
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <functional>
|
||||||
|
#include <Eigen/Core>
|
||||||
|
#include "../../BenchTimer.h"
|
||||||
|
using namespace Eigen;
|
||||||
|
|
||||||
|
#ifndef SCALAR
|
||||||
|
#error SCALAR must be defined
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef SCALAR Scalar;
|
||||||
|
|
||||||
|
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
||||||
|
typedef Matrix<Scalar,Dynamic,1> Vec;
|
||||||
|
|
||||||
|
template<typename Func>
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
double bench(long m, long n, Func &f)
|
||||||
|
{
|
||||||
|
Mat A(m,n);
|
||||||
|
Vec B(n);
|
||||||
|
Vec C(m);
|
||||||
|
A.setRandom();
|
||||||
|
B.setRandom();
|
||||||
|
C.setRandom();
|
||||||
|
|
||||||
|
BenchTimer t;
|
||||||
|
|
||||||
|
double up = 1e8/sizeof(Scalar);
|
||||||
|
double tm0 = 4, tm1 = 10;
|
||||||
|
if(NumTraits<Scalar>::IsComplex)
|
||||||
|
{
|
||||||
|
up /= 4;
|
||||||
|
tm0 = 2;
|
||||||
|
tm1 = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
double flops = 2. * m * n;
|
||||||
|
long rep = std::max(1., std::min(100., up/flops) );
|
||||||
|
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
||||||
|
|
||||||
|
BENCH(t, tries, rep, f(A,B,C));
|
||||||
|
|
||||||
|
return 1e-9 * rep * flops / t.best();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Func>
|
||||||
|
int main_gemv(int argc, char **argv, Func& f, const std::string &setting_filename)
|
||||||
|
{
|
||||||
|
std::vector<double> results;
|
||||||
|
|
||||||
|
std::string filename = std::string("gemv_settings.txt");
|
||||||
|
if(argc>1)
|
||||||
|
filename = std::string(argv[1]);
|
||||||
|
std::ifstream settings(setting_filename);
|
||||||
|
long m, n;
|
||||||
|
while(settings >> m >> n)
|
||||||
|
{
|
||||||
|
//std::cerr << " Testing " << m << " " << n << std::endl;
|
||||||
|
results.push_back( bench(m, n, f) );
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << RowVectorXd::Map(results.data(), results.size());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
12
bench/perf_monitoring/gemm/gemvt.cpp
Normal file
12
bench/perf_monitoring/gemm/gemvt.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include "gemv_common.h"
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void gemv(const Mat &A, Vec &B, const Vec &C)
|
||||||
|
{
|
||||||
|
B.noalias() += A.transpose() * C;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
return main_gemv(argc, argv, gemv);
|
||||||
|
}
|
@ -84,7 +84,10 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
std::vector<double> results;
|
std::vector<double> results;
|
||||||
|
|
||||||
std::ifstream settings("lazy_gemm_settings.txt");
|
std::string filename = std::string("lazy_gemm_settings.txt");
|
||||||
|
if(argc>1)
|
||||||
|
filename = std::string(argv[1]);
|
||||||
|
std::ifstream settings(filename);
|
||||||
long m, n, k, t;
|
long m, n, k, t;
|
||||||
while(settings >> m >> n >> k >> t)
|
while(settings >> m >> n >> k >> t)
|
||||||
{
|
{
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
# and generates $1.pdf
|
# and generates $1.pdf
|
||||||
WHAT=$1
|
WHAT=$1
|
||||||
bench=$2
|
bench=$2
|
||||||
|
settings_file=$3
|
||||||
|
|
||||||
header="rev "
|
header="rev "
|
||||||
while read line
|
while read line
|
||||||
@ -12,7 +13,7 @@ do
|
|||||||
if [ ! -z '$line' ]; then
|
if [ ! -z '$line' ]; then
|
||||||
header="$header \"$line\""
|
header="$header \"$line\""
|
||||||
fi
|
fi
|
||||||
done < $bench"_settings.txt"
|
done < $settings_file
|
||||||
|
|
||||||
echo $header > $WHAT.out.header
|
echo $header > $WHAT.out.header
|
||||||
cat $WHAT.out >> $WHAT.out.header
|
cat $WHAT.out >> $WHAT.out.header
|
||||||
@ -25,7 +26,7 @@ echo "set xtics rotate 1" >> $WHAT.gnuplot
|
|||||||
echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot
|
echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot
|
||||||
echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot
|
echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot
|
||||||
|
|
||||||
col=`cat $bench"_settings.txt" | wc -l`
|
col=`cat $settings_file | wc -l`
|
||||||
echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot
|
echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot
|
||||||
echo " " >> $WHAT.gnuplot
|
echo " " >> $WHAT.gnuplot
|
||||||
|
|
||||||
|
@ -1,17 +1,22 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# ./run.sh gemm
|
# ./run.sh gemm gemm_settings.txt
|
||||||
# ./run.sh lazy_gemm
|
# ./run.sh lazy_gemm lazy_gemm_settings.txt
|
||||||
|
# ./run.sh gemv gemv_settings.txt
|
||||||
|
# ./run.sh trmv_up gemv_square_settings.txt
|
||||||
|
# ...
|
||||||
|
|
||||||
# Examples of environment variables to be set:
|
# Examples of environment variables to be set:
|
||||||
# PREFIX="haswell-fma-"
|
# PREFIX="haswell-fma-"
|
||||||
# CXX_FLAGS="-mfma"
|
# CXX_FLAGS="-mfma"
|
||||||
|
# CXX=clang++
|
||||||
|
|
||||||
# Options:
|
# Options:
|
||||||
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
|
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
|
||||||
# -s : recompute selected changesets only and keep bests
|
# -s : recompute selected changesets only and keep bests
|
||||||
|
|
||||||
bench=$1
|
bench=$1
|
||||||
|
settings_file=$2
|
||||||
|
|
||||||
if echo "$*" | grep '\-up' > /dev/null; then
|
if echo "$*" | grep '\-up' > /dev/null; then
|
||||||
update=true
|
update=true
|
||||||
@ -88,7 +93,7 @@ function test_current
|
|||||||
fi
|
fi
|
||||||
res=$prev
|
res=$prev
|
||||||
count_rev=`echo $prev | wc -w`
|
count_rev=`echo $prev | wc -w`
|
||||||
count_ref=`cat $bench"_settings.txt" | wc -l`
|
count_ref=`cat $settings_file | wc -l`
|
||||||
if echo "$global_args" | grep "$rev" > /dev/null; then
|
if echo "$global_args" | grep "$rev" > /dev/null; then
|
||||||
rev_found=true
|
rev_found=true
|
||||||
else
|
else
|
||||||
@ -98,7 +103,7 @@ function test_current
|
|||||||
# echo $count_rev et $count_ref
|
# echo $count_rev et $count_ref
|
||||||
if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then
|
if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then
|
||||||
if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then
|
if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then
|
||||||
curr=`./$name`
|
curr=`./$name $settings_file`
|
||||||
if [ $count_rev == $count_ref ]; then
|
if [ $count_rev == $count_ref ]; then
|
||||||
echo "merge previous $prev"
|
echo "merge previous $prev"
|
||||||
echo "with new $curr"
|
echo "with new $curr"
|
||||||
@ -149,8 +154,8 @@ echo "Complex:"
|
|||||||
cat $PREFIX"c""$bench.out"
|
cat $PREFIX"c""$bench.out"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
./make_plot.sh $PREFIX"s"$bench $bench
|
./make_plot.sh $PREFIX"s"$bench $bench $settings_file
|
||||||
./make_plot.sh $PREFIX"d"$bench $bench
|
./make_plot.sh $PREFIX"d"$bench $bench $settings_file
|
||||||
./make_plot.sh $PREFIX"c"$bench $bench
|
./make_plot.sh $PREFIX"c"$bench $bench $settings_file
|
||||||
|
|
||||||
|
|
||||||
|
22
bench/perf_monitoring/gemm/runall.sh
Executable file
22
bench/perf_monitoring/gemm/runall.sh
Executable file
@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# ./runall.sh
|
||||||
|
|
||||||
|
# Examples of environment variables to be set:
|
||||||
|
# PREFIX="haswell-fma-"
|
||||||
|
# CXX_FLAGS="-mfma"
|
||||||
|
# CXX=clang++
|
||||||
|
|
||||||
|
# Options:
|
||||||
|
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
|
||||||
|
# -s : recompute selected changesets only and keep bests
|
||||||
|
|
||||||
|
./run.sh gemm gemm_settings.txt $*
|
||||||
|
./run.sh lazy_gemm lazy_gemm_settings.txt $*
|
||||||
|
./run.sh gemv gemv_settings.txt $*
|
||||||
|
./run.sh gemvt gemv_settings.txt $*
|
||||||
|
./run.sh trmv_up gemv_square_settings.txt $*
|
||||||
|
./run.sh trmv_lo gemv_square_settings.txt $*
|
||||||
|
./run.sh trmv_upt gemv_square_settings.txt $*
|
||||||
|
./run.sh trmv_lot gemv_square_settings.txt $*
|
||||||
|
|
12
bench/perf_monitoring/gemm/trmv_lo.cpp
Normal file
12
bench/perf_monitoring/gemm/trmv_lo.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include "gemv_common.h"
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void trmv(const Mat &A, const Vec &B, Vec &C)
|
||||||
|
{
|
||||||
|
C.noalias() += A.triangularView<Lower>() * B;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
return main_gemv(argc, argv, trmv);
|
||||||
|
}
|
12
bench/perf_monitoring/gemm/trmv_lot.cpp
Normal file
12
bench/perf_monitoring/gemm/trmv_lot.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include "gemv_common.h"
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void gemv(const Mat &A, Vec &B, const Vec &C)
|
||||||
|
{
|
||||||
|
B.noalias() += A.transpose().triangularView<Lower>() * C;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
return main_gemv(argc, argv, trmv);
|
||||||
|
}
|
12
bench/perf_monitoring/gemm/trmv_up.cpp
Normal file
12
bench/perf_monitoring/gemm/trmv_up.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include "gemv_common.h"
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void trmv(const Mat &A, const Vec &B, Vec &C)
|
||||||
|
{
|
||||||
|
C.noalias() += A.triangularView<Upper>() * B;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
return main_gemv(argc, argv, trmv);
|
||||||
|
}
|
12
bench/perf_monitoring/gemm/trmv_upt.cpp
Normal file
12
bench/perf_monitoring/gemm/trmv_upt.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include "gemv_common.h"
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void trmv(const Mat &A, Vec &B, const Vec &C)
|
||||||
|
{
|
||||||
|
B.noalias() += A.transpose().triangularView<Upper>() * C;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
return main_gemv(argc, argv, trmv);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user