mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-13 08:18:06 +08:00
Sync from Head.
This commit is contained in:
commit
a0d3ac760f
@ -70,8 +70,10 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
|||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||||
|
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||||
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
||||||
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
||||||
|
#endif
|
||||||
|
|
||||||
eigen_assert(size() == other.size());
|
eigen_assert(size() == other.size());
|
||||||
|
|
||||||
|
@ -224,50 +224,65 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
|||||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||||
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
||||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||||
MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
|
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
|
||||||
};
|
};
|
||||||
|
|
||||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
|
||||||
|
|
||||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
|
||||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
|
||||||
|
|
||||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
|
||||||
|
|
||||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
|
||||||
evalToDest ? dest.data() : static_dest.data());
|
|
||||||
|
|
||||||
if(!evalToDest)
|
|
||||||
{
|
|
||||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
|
||||||
Index size = dest.size();
|
|
||||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
|
||||||
#endif
|
|
||||||
if(!alphaIsCompatible)
|
|
||||||
{
|
|
||||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
|
||||||
compatibleAlpha = RhsScalar(1);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
||||||
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
||||||
general_matrix_vector_product
|
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
|
||||||
actualLhs.rows(), actualLhs.cols(),
|
|
||||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
|
||||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
|
||||||
actualDestPtr, 1,
|
|
||||||
compatibleAlpha);
|
|
||||||
|
|
||||||
if (!evalToDest)
|
if(!MightCannotUseDest)
|
||||||
{
|
{
|
||||||
if(!alphaIsCompatible)
|
// shortcut if we are sure to be able to use dest directly,
|
||||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
// this ease the compiler to generate cleaner and more optimzized code for most common cases
|
||||||
else
|
general_matrix_vector_product
|
||||||
dest = MappedDest(actualDestPtr, dest.size());
|
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||||
|
actualLhs.rows(), actualLhs.cols(),
|
||||||
|
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||||
|
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||||
|
dest.data(), 1,
|
||||||
|
compatibleAlpha);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||||
|
|
||||||
|
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||||
|
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||||
|
|
||||||
|
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||||
|
evalToDest ? dest.data() : static_dest.data());
|
||||||
|
|
||||||
|
if(!evalToDest)
|
||||||
|
{
|
||||||
|
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||||
|
Index size = dest.size();
|
||||||
|
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||||
|
#endif
|
||||||
|
if(!alphaIsCompatible)
|
||||||
|
{
|
||||||
|
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||||
|
compatibleAlpha = RhsScalar(1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
general_matrix_vector_product
|
||||||
|
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||||
|
actualLhs.rows(), actualLhs.cols(),
|
||||||
|
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||||
|
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||||
|
actualDestPtr, 1,
|
||||||
|
compatibleAlpha);
|
||||||
|
|
||||||
|
if (!evalToDest)
|
||||||
|
{
|
||||||
|
if(!alphaIsCompatible)
|
||||||
|
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||||
|
else
|
||||||
|
dest = MappedDest(actualDestPtr, dest.size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -763,6 +763,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
|
|||||||
{
|
{
|
||||||
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
// NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
|
||||||
const bool is_integer = NumTraits<T>::IsInteger;
|
const bool is_integer = NumTraits<T>::IsInteger;
|
||||||
|
EIGEN_UNUSED_VARIABLE(is_integer);
|
||||||
EIGEN_STATIC_ASSERT(is_integer,
|
EIGEN_STATIC_ASSERT(is_integer,
|
||||||
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
|
||||||
resize(size);
|
resize(size);
|
||||||
|
@ -158,10 +158,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
|||||||
static EIGEN_STRONG_INLINE
|
static EIGEN_STRONG_INLINE
|
||||||
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
||||||
{
|
{
|
||||||
Index dstRows = src.rows();
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||||
Index dstCols = src.cols();
|
|
||||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
|
||||||
dst.resize(dstRows, dstCols);
|
|
||||||
// FIXME shall we handle nested_eval here?
|
// FIXME shall we handle nested_eval here?
|
||||||
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
|
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
|
||||||
}
|
}
|
||||||
@ -176,10 +173,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
|||||||
static EIGEN_STRONG_INLINE
|
static EIGEN_STRONG_INLINE
|
||||||
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
||||||
{
|
{
|
||||||
Index dstRows = src.rows();
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||||
Index dstCols = src.cols();
|
|
||||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
|
||||||
dst.resize(dstRows, dstCols);
|
|
||||||
// FIXME shall we handle nested_eval here?
|
// FIXME shall we handle nested_eval here?
|
||||||
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
|
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
|
||||||
}
|
}
|
||||||
@ -377,7 +371,6 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|||||||
{
|
{
|
||||||
LhsNested actual_lhs(lhs);
|
LhsNested actual_lhs(lhs);
|
||||||
RhsNested actual_rhs(rhs);
|
RhsNested actual_rhs(rhs);
|
||||||
|
|
||||||
internal::gemv_dense_selector<Side,
|
internal::gemv_dense_selector<Side,
|
||||||
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||||
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
|
bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
|
||||||
|
@ -523,7 +523,7 @@ template<typename T> struct smart_memmove_helper<T,true> {
|
|||||||
template<typename T> struct smart_memmove_helper<T,false> {
|
template<typename T> struct smart_memmove_helper<T,false> {
|
||||||
static inline void run(const T* start, const T* end, T* target)
|
static inline void run(const T* start, const T* end, T* target)
|
||||||
{
|
{
|
||||||
if (uintptr_t(target) < uintptr_t(start))
|
if (UIntPtr(target) < UIntPtr(start))
|
||||||
{
|
{
|
||||||
std::copy(start, end, target);
|
std::copy(start, end, target);
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#include "action_atv_product.hh"
|
#include "action_atv_product.hh"
|
||||||
|
|
||||||
#include "action_matrix_matrix_product.hh"
|
#include "action_matrix_matrix_product.hh"
|
||||||
// #include "action_ata_product.hh"
|
#include "action_ata_product.hh"
|
||||||
#include "action_aat_product.hh"
|
#include "action_aat_product.hh"
|
||||||
|
|
||||||
#include "action_trisolve.hh"
|
#include "action_trisolve.hh"
|
||||||
|
@ -46,9 +46,9 @@ public :
|
|||||||
BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
|
BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
|
||||||
}
|
}
|
||||||
|
|
||||||
// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
|
||||||
// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
|
BLAS_FUNC(syrk)(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
|
||||||
// }
|
}
|
||||||
|
|
||||||
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
|
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
|
||||||
BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N);
|
BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N);
|
||||||
|
@ -48,7 +48,7 @@ int main()
|
|||||||
bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
|
||||||
|
|
||||||
bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
@ -78,18 +78,18 @@ public :
|
|||||||
cible[i][j]=source[i][j];
|
cible[i][j]=source[i][j];
|
||||||
}
|
}
|
||||||
|
|
||||||
// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||||
// {
|
{
|
||||||
// real somme;
|
real somme;
|
||||||
// for (int j=0;j<N;j++){
|
for (int j=0;j<N;j++){
|
||||||
// for (int i=0;i<N;i++){
|
for (int i=0;i<N;i++){
|
||||||
// somme=0.0;
|
somme=0.0;
|
||||||
// for (int k=0;k<N;k++)
|
for (int k=0;k<N;k++)
|
||||||
// somme += A[i][k]*A[j][k];
|
somme += A[i][k]*A[j][k];
|
||||||
// X[j][i]=somme;
|
X[j][i]=somme;
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
|
|
||||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
|
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
|
||||||
{
|
{
|
||||||
|
@ -80,35 +80,35 @@ public :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||||
X = (A*B);
|
X = (A*B);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
|
||||||
X = (trans(A)*trans(B));
|
X = (trans(A)*trans(B));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
static EIGEN_DONT_INLINE void ata_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||||
X = (trans(A)*A);
|
X = (trans(A)*A);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
static EIGEN_DONT_INLINE void aat_product(const gene_matrix & A, gene_matrix & X, int N){
|
||||||
X = (A*trans(A));
|
X = (A*trans(A));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
X = (A*B);
|
X = (A*B);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
static EIGEN_DONT_INLINE void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
|
||||||
X = (trans(A)*B);
|
X = (trans(A)*B);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
|
||||||
Y += coef * X;
|
Y += coef * X;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
static EIGEN_DONT_INLINE void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
|
||||||
Y = a*X + b*Y;
|
Y = a*X + b*Y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,9 +30,9 @@ int main()
|
|||||||
|
|
||||||
bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
|
||||||
// bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -92,9 +92,11 @@ public :
|
|||||||
X.noalias() = A.transpose()*B.transpose();
|
X.noalias() = A.transpose()*B.transpose();
|
||||||
}
|
}
|
||||||
|
|
||||||
// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
|
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
|
||||||
// X.noalias() = A.transpose()*A;
|
//X.noalias() = A.transpose()*A;
|
||||||
// }
|
X.template triangularView<Lower>().setZero();
|
||||||
|
X.template selfadjointView<Lower>().rankUpdate(A.transpose());
|
||||||
|
}
|
||||||
|
|
||||||
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
|
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int /*N*/){
|
||||||
X.template triangularView<Lower>().setZero();
|
X.template triangularView<Lower>().setZero();
|
||||||
|
@ -25,7 +25,7 @@ BTL_MAIN;
|
|||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_matrix_matrix_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
// bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_ata_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_aat_product<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
bench<Action_trmm<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
bench<Action_trmm<eigen3_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||||
|
|
||||||
|
@ -59,3 +59,6 @@ before-evaluators
|
|||||||
9174:d228bc282ac9 # merge
|
9174:d228bc282ac9 # merge
|
||||||
9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955
|
9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955
|
||||||
9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775
|
9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775
|
||||||
|
3.3-beta2
|
||||||
|
3.3-rc1
|
||||||
|
3.3.0
|
||||||
|
68
bench/perf_monitoring/gemm/gemv.cpp
Normal file
68
bench/perf_monitoring/gemm/gemv.cpp
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <Eigen/Core>
|
||||||
|
#include "../../BenchTimer.h"
|
||||||
|
using namespace Eigen;
|
||||||
|
|
||||||
|
#ifndef SCALAR
|
||||||
|
#error SCALAR must be defined
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef SCALAR Scalar;
|
||||||
|
|
||||||
|
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
||||||
|
typedef Matrix<Scalar,Dynamic,1> Vec;
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void gemv(const Mat &A, const Vec &B, Vec &C)
|
||||||
|
{
|
||||||
|
C.noalias() += A * B;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
double bench(long m, long n)
|
||||||
|
{
|
||||||
|
Mat A(m,n);
|
||||||
|
Vec B(n);
|
||||||
|
Vec C(m);
|
||||||
|
A.setRandom();
|
||||||
|
B.setRandom();
|
||||||
|
C.setZero();
|
||||||
|
|
||||||
|
BenchTimer t;
|
||||||
|
|
||||||
|
double up = 1e9*4/sizeof(Scalar);
|
||||||
|
double tm0 = 4, tm1 = 10;
|
||||||
|
if(NumTraits<Scalar>::IsComplex)
|
||||||
|
{
|
||||||
|
up /= 4;
|
||||||
|
tm0 = 2;
|
||||||
|
tm1 = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
double flops = 2. * m * n;
|
||||||
|
long rep = std::max(1., std::min(100., up/flops) );
|
||||||
|
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
||||||
|
|
||||||
|
BENCH(t, tries, rep, gemv(A,B,C));
|
||||||
|
|
||||||
|
return 1e-9 * rep * flops / t.best();
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::vector<double> results;
|
||||||
|
|
||||||
|
std::ifstream settings("gemv_settings.txt");
|
||||||
|
long m, n;
|
||||||
|
while(settings >> m >> n)
|
||||||
|
{
|
||||||
|
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
|
||||||
|
results.push_back( bench(m, n) );
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << RowVectorXd::Map(results.data(), results.size());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
11
bench/perf_monitoring/gemm/gemv_settings.txt
Normal file
11
bench/perf_monitoring/gemm/gemv_settings.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
8 8
|
||||||
|
9 9
|
||||||
|
24 24
|
||||||
|
239 239
|
||||||
|
240 240
|
||||||
|
2400 24
|
||||||
|
24 2400
|
||||||
|
24 240
|
||||||
|
2400 2400
|
||||||
|
4800 23
|
||||||
|
23 4800
|
Loading…
x
Reference in New Issue
Block a user