diff --git a/Eigen/src/Core/arch/AVX512/GemmKernel.h b/Eigen/src/Core/arch/AVX512/GemmKernel.h index 477c50fa5..f089e695b 100644 --- a/Eigen/src/Core/arch/AVX512/GemmKernel.h +++ b/Eigen/src/Core/arch/AVX512/GemmKernel.h @@ -20,6 +20,10 @@ #include "../../InternalHeaderCheck.h" +#if !defined(EIGEN_USE_AVX512_GEMM_KERNELS) +#define EIGEN_USE_AVX512_GEMM_KERNELS 1 +#endif + #define SECOND_FETCH (32) #if (EIGEN_COMP_GNUC_STRICT != 0) && !defined(EIGEN_ARCH_AVX512_GEMM_KERNEL_USE_LESS_A_REGS) // Use less registers to load A elements to workaround compiler spills. Loose a @@ -930,6 +934,8 @@ EIGEN_DONT_INLINE void gemm_kern_avx512(Index m, Index n, Index k, Scalar *alpha g.template compute_kern(); } +// Template specializations of GEBP kernels with nr = 8. +#if EIGEN_USE_AVX512_GEMM_KERNELS template class gebp_traits : public gebp_traits { @@ -1218,6 +1224,7 @@ EIGEN_ALWAYS_INLINE void gebp_kernel(cutoff_d); return (cutoff_l / EIGEN_AVX_MAX_NUM_ROW) * EIGEN_AVX_MAX_NUM_ROW; } +#else // !(EIGEN_USE_AVX512_TRSM_KERNELS) || !(EIGEN_COMP_CLANG != 0) +#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_CUTOFFS 0 +#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS 0 +#define EIGEN_ENABLE_AVX512_NOCOPY_TRSM_L_CUTOFFS 0 #endif /**