From 6973687c70a722cd19263152c2f6c1e49862a18f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Tue, 15 Mar 2022 20:22:23 +0000 Subject: [PATCH] Fix up PowerPC MMA flags so it builds by default. (cherry picked from commit 65eeedf9646ee6efc457cc3a8f8d9030a6f83689) --- Eigen/src/Core/arch/AltiVec/MatrixProduct.h | 211 +++++++++--------- .../src/Core/arch/AltiVec/MatrixProductMMA.h | 9 +- 2 files changed, 115 insertions(+), 105 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h index 8feb88ea7..1888347b1 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProduct.h @@ -17,24 +17,27 @@ #include "MatrixProductCommon.h" -// Since LLVM doesn't support dynamic dispatching, force either always MMA or VSX -#if EIGEN_COMP_LLVM -#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY) -#ifdef __MMA__ -#define EIGEN_ALTIVEC_MMA_ONLY -#else -#define EIGEN_ALTIVEC_DISABLE_MMA -#endif -#endif -#endif - -#ifdef __has_builtin +// Check for MMA builtin support. +#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && defined(__has_builtin) #if __has_builtin(__builtin_mma_assemble_acc) - #define ALTIVEC_MMA_SUPPORT + #define EIGEN_ALTIVEC_MMA_SUPPORT #endif #endif -#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) +// Check if and how we should actually use MMA if supported. +#if defined(EIGEN_ALTIVEC_MMA_SUPPORT) + +// Use MMA by default if available. +#if defined(__MMA__) +#define EIGEN_ALTIVEC_MMA_ONLY 1 +// Otherwise, check if we want to enable dynamic dispatch. Not supported by LLVM. +#elif defined(EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH) && !defined(EIGEN_COMP_LLVM) +#define EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH 1 +#endif + +#endif // EIGEN_ALTIVEC_MMA_SUPPORT + +#if defined(EIGEN_ALTIVEC_MMA_ONLY) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) #include "MatrixProductMMA.h" #endif @@ -2477,10 +2480,10 @@ void gebp_kernel::size; void (*gemm_function)(const DataMapper&, const float*, const float*, Index, Index, Index, float, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY + #if defined(EIGEN_ALTIVEC_MMA_ONLY) //generate with MMA only gemm_function = &Eigen::internal::gemmMMA; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ gemm_function = &Eigen::internal::gemmMMA; } @@ -2490,7 +2493,7 @@ void gebp_kernel; #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } template @@ -2516,20 +2519,20 @@ void gebp_kernel, std::complex, Index, DataMapper, mr void (*gemm_function)(const DataMapper&, const std::complex*, const std::complex*, Index, Index, Index, std::complex, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY - //generate with MMA only - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - } - else{ - gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - } - #else - gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + #if defined(EIGEN_ALTIVEC_MMA_ONLY) + //generate with MMA only + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + } + else{ + gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + } + #else + gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + #endif + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } template @@ -2554,20 +2557,20 @@ void gebp_kernel, Index, DataMapper, mr, nr, Conjugat const Index accCols = quad_traits::size; void (*gemm_function)(const DataMapper&, const float*, const std::complex*, Index, Index, Index, std::complex, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY - //generate with MMA only - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - } - else{ - gemm_function = &Eigen::internal::gemm_complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - } - #else - gemm_function = &Eigen::internal::gemm_complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + #if defined(EIGEN_ALTIVEC_MMA_ONLY) + //generate with MMA only + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + } + else{ + gemm_function = &Eigen::internal::gemm_complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + } + #else + gemm_function = &Eigen::internal::gemm_complex, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + #endif + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } template @@ -2592,20 +2595,20 @@ void gebp_kernel, float, Index, DataMapper, mr, nr, Conjugat const Index accCols = quad_traits::size; void (*gemm_function)(const DataMapper&, const std::complex*, const float*, Index, Index, Index, std::complex, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY - //generate with MMA only - gemm_function = &Eigen::internal::gemm_complexMMA, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ - gemm_function = &Eigen::internal::gemm_complexMMA, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - } - else{ - gemm_function = &Eigen::internal::gemm_complex, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - } - #else - gemm_function = &Eigen::internal::gemm_complex, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + #if defined(EIGEN_ALTIVEC_MMA_ONLY) + //generate with MMA only + gemm_function = &Eigen::internal::gemm_complexMMA, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ + gemm_function = &Eigen::internal::gemm_complexMMA, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + } + else{ + gemm_function = &Eigen::internal::gemm_complex, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + } + #else + gemm_function = &Eigen::internal::gemm_complex, float, std::complex, float, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + #endif + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } template @@ -2629,10 +2632,10 @@ void gebp_kernel::size; void (*gemm_function)(const DataMapper&, const double*, const double*, Index, Index, Index, double, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY + #if defined(EIGEN_ALTIVEC_MMA_ONLY) //generate with MMA only gemm_function = &Eigen::internal::gemmMMA; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ gemm_function = &Eigen::internal::gemmMMA; } @@ -2642,7 +2645,7 @@ void gebp_kernel; #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } template @@ -2667,20 +2670,20 @@ void gebp_kernel, std::complex, Index, DataMapper, const Index accCols = quad_traits::size; void (*gemm_function)(const DataMapper&, const std::complex*, const std::complex*, Index, Index, Index, std::complex, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY - //generate with MMA only - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - } - else{ - gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - } - #else - gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; - #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + #if defined(EIGEN_ALTIVEC_MMA_ONLY) + //generate with MMA only + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + } + else{ + gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + } + #else + gemm_function = &Eigen::internal::gemm_complex, std::complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, false>; + #endif + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } template @@ -2705,20 +2708,20 @@ void gebp_kernel, double, Index, DataMapper, mr, nr, Conjug const Index accCols = quad_traits::size; void (*gemm_function)(const DataMapper&, const std::complex*, const double*, Index, Index, Index, std::complex, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY - //generate with MMA only - gemm_function = &Eigen::internal::gemm_complexMMA, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ - gemm_function = &Eigen::internal::gemm_complexMMA, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - } - else{ - gemm_function = &Eigen::internal::gemm_complex, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - } - #else - gemm_function = &Eigen::internal::gemm_complex, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; - #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + #if defined(EIGEN_ALTIVEC_MMA_ONLY) + //generate with MMA only + gemm_function = &Eigen::internal::gemm_complexMMA, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ + gemm_function = &Eigen::internal::gemm_complexMMA, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + } + else{ + gemm_function = &Eigen::internal::gemm_complex, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + } + #else + gemm_function = &Eigen::internal::gemm_complex, double, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, false, true>; + #endif + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } template @@ -2743,20 +2746,20 @@ void gebp_kernel, Index, DataMapper, mr, nr, Conjug const Index accCols = quad_traits::size; void (*gemm_function)(const DataMapper&, const double*, const std::complex*, Index, Index, Index, std::complex, Index, Index, Index, Index); - #ifdef EIGEN_ALTIVEC_MMA_ONLY - //generate with MMA only - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA) - if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ - gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - } - else{ - gemm_function = &Eigen::internal::gemm_complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - } - #else - gemm_function = &Eigen::internal::gemm_complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; - #endif - gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + #if defined(EIGEN_ALTIVEC_MMA_ONLY) + //generate with MMA only + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) + if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma")){ + gemm_function = &Eigen::internal::gemm_complexMMA, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + } + else{ + gemm_function = &Eigen::internal::gemm_complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + } + #else + gemm_function = &Eigen::internal::gemm_complex, std::complex, double, Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs, true, false>; + #endif + gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } } // end namespace internal diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h index 5b4449537..7dda42339 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h @@ -11,7 +11,11 @@ #ifndef EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H #define EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H +// If using dynamic dispatch, set the CPU target. +#if defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) +#pragma GCC push_options #pragma GCC target("cpu=power10,htm") +#endif #ifdef __has_builtin #if !__has_builtin(__builtin_vsx_assemble_pair) @@ -611,10 +615,13 @@ void gemm_complexMMA(const DataMapper& res, const LhsScalar* blockAc, const RhsS #undef advanceRows #undef advanceCols -#pragma GCC reset_options } // end namespace internal } // end namespace Eigen +#if defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH) +#pragma GCC pop_options +#endif + #endif // EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H