Add option to disable avx512 GEBP kernels

This commit is contained in:
b-shi 2022-07-18 17:59:09 +00:00 committed by Antonio Sánchez
parent 1092574b26
commit 4a56359406
2 changed files with 11 additions and 0 deletions

View File

@ -20,6 +20,10 @@
#include "../../InternalHeaderCheck.h"
#if !defined(EIGEN_USE_AVX512_GEMM_KERNELS)
#define EIGEN_USE_AVX512_GEMM_KERNELS 1
#endif
#define SECOND_FETCH (32)
#if (EIGEN_COMP_GNUC_STRICT != 0) && !defined(EIGEN_ARCH_AVX512_GEMM_KERNEL_USE_LESS_A_REGS)
// Use less registers to load A elements to workaround compiler spills. Loose a
@ -930,6 +934,8 @@ EIGEN_DONT_INLINE void gemm_kern_avx512(Index m, Index n, Index k, Scalar *alpha
g.template compute_kern<max_a_unroll, max_b_unroll>();
}
// Template specializations of GEBP kernels with nr = 8.
#if EIGEN_USE_AVX512_GEMM_KERNELS
template <bool ConjLhs_, bool ConjRhs_, int PacketSize_>
class gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Target, PacketSize_>
: public gebp_traits<float, float, ConjLhs_, ConjRhs_, Architecture::Generic, PacketSize_> {
@ -1218,6 +1224,7 @@ EIGEN_ALWAYS_INLINE void gebp_kernel<Scalar, Scalar, Index, DataMapper, mr, 8, C
}
}
}
#endif // EIGEN_USE_AVX512_GEMM_KERNELS
} // namespace internal
} // namespace Eigen

View File

@ -106,6 +106,10 @@ int64_t avx512_trsm_cutoff(int64_t L2Size, int64_t N, double L2Cap) {
int64_t cutoff_l = static_cast<int64_t>(cutoff_d);
return (cutoff_l / EIGEN_AVX_MAX_NUM_ROW) * EIGEN_AVX_MAX_NUM_ROW;
}
#else // !(EIGEN_USE_AVX512_TRSM_KERNELS) || !(EIGEN_COMP_CLANG != 0)
#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS 0
#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS 0
#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS 0
#endif
/**