mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 08:09:36 +08:00
Restrict new AVX512 trsm to AVX512VL, rename files for consistency.
This commit is contained in:
parent
67eeba6e72
commit
07db964bde
@ -190,7 +190,9 @@ using std::ptrdiff_t;
|
|||||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||||
#include "src/Core/arch/AVX512/MathFunctions.h"
|
#include "src/Core/arch/AVX512/MathFunctions.h"
|
||||||
#include "src/Core/arch/AVX512/trsmKernel_impl.hpp"
|
#ifdef __AVX512VL__
|
||||||
|
#include "src/Core/arch/AVX512/TrsmKernel.h"
|
||||||
|
#endif
|
||||||
#elif defined EIGEN_VECTORIZE_AVX
|
#elif defined EIGEN_VECTORIZE_AVX
|
||||||
// Use AVX for floats and doubles, SSE for integers
|
// Use AVX for floats and doubles, SSE for integers
|
||||||
#include "src/Core/arch/SSE/PacketMath.h"
|
#include "src/Core/arch/SSE/PacketMath.h"
|
||||||
|
@ -237,7 +237,7 @@ template<> struct unpacket_traits<Packet8f> {
|
|||||||
typedef Packet8i integer_packet;
|
typedef Packet8i integer_packet;
|
||||||
typedef uint8_t mask_t;
|
typedef uint8_t mask_t;
|
||||||
enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true
|
enum {size=8, alignment=Aligned32, vectorizable=true, masked_load_available=true, masked_store_available=true
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512
|
#ifdef __AVX512VL__
|
||||||
, masked_fpops_available=true
|
, masked_fpops_available=true
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
@ -468,7 +468,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { r
|
|||||||
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
|
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512
|
#ifdef __AVX512VL__
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b, uint8_t umask) {
|
EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b, uint8_t umask) {
|
||||||
__mmask8 mask = static_cast<__mmask8>(umask);
|
__mmask8 mask = static_cast<__mmask8>(umask);
|
||||||
@ -859,7 +859,7 @@ template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { E
|
|||||||
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
|
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) {
|
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) {
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512
|
#ifdef __AVX512VL__
|
||||||
__mmask8 mask = static_cast<__mmask8>(umask);
|
__mmask8 mask = static_cast<__mmask8>(umask);
|
||||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskz_loadu_ps(mask, from);
|
EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskz_loadu_ps(mask, from);
|
||||||
#else
|
#else
|
||||||
@ -927,7 +927,7 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d&
|
|||||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from, uint8_t umask) {
|
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from, uint8_t umask) {
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512
|
#ifdef __AVX512VL__
|
||||||
__mmask8 mask = static_cast<__mmask8>(umask);
|
__mmask8 mask = static_cast<__mmask8>(umask);
|
||||||
EIGEN_DEBUG_UNALIGNED_STORE return _mm256_mask_storeu_ps(to, mask, from);
|
EIGEN_DEBUG_UNALIGNED_STORE return _mm256_mask_storeu_ps(to, mask, from);
|
||||||
#else
|
#else
|
||||||
|
@ -247,7 +247,7 @@ template <>
|
|||||||
EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
|
EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
|
||||||
#if (EIGEN_COMP_GNUC != 0) || (EIGEN_COMP_CLANG != 0)
|
#if (EIGEN_COMP_GNUC != 0) || (EIGEN_COMP_CLANG != 0)
|
||||||
// Inline asm here helps reduce some register spilling in TRSM kernels.
|
// Inline asm here helps reduce some register spilling in TRSM kernels.
|
||||||
// See note in unrolls::gemm::microKernel in trsmKernel_impl.hpp
|
// See note in unrolls::gemm::microKernel in TrsmKernel.h
|
||||||
Packet16f ret;
|
Packet16f ret;
|
||||||
__asm__ ("vbroadcastss %[mem], %[dst]" : [dst] "=v" (ret) : [mem] "m" (*from));
|
__asm__ ("vbroadcastss %[mem], %[dst]" : [dst] "=v" (ret) : [mem] "m" (*from));
|
||||||
return ret;
|
return ret;
|
||||||
@ -300,6 +300,7 @@ EIGEN_STRONG_INLINE Packet16i padd<Packet16i>(const Packet16i& a,
|
|||||||
const Packet16i& b) {
|
const Packet16i& b) {
|
||||||
return _mm512_add_epi32(a, b);
|
return _mm512_add_epi32(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
|
EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
|
||||||
const Packet16f& b,
|
const Packet16f& b,
|
||||||
@ -800,6 +801,7 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
|
|||||||
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
|
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
|
||||||
reinterpret_cast<const __m512i*>(from));
|
reinterpret_cast<const __m512i*>(from));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from, uint16_t umask) {
|
EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from, uint16_t umask) {
|
||||||
__mmask16 mask = static_cast<__mmask16>(umask);
|
__mmask16 mask = static_cast<__mmask16>(umask);
|
||||||
|
@ -38,8 +38,9 @@ typedef Packet8d vecFullDouble;
|
|||||||
typedef Packet8f vecHalfFloat;
|
typedef Packet8f vecHalfFloat;
|
||||||
typedef Packet4d vecHalfDouble;
|
typedef Packet4d vecHalfDouble;
|
||||||
|
|
||||||
// Compile-time unrolls are implemented here
|
// Compile-time unrolls are implemented here.
|
||||||
#include "unrolls_impl.hpp"
|
// Note: this depends on macros and typedefs above.
|
||||||
|
#include "TrsmUnrolls.inc"
|
||||||
|
|
||||||
|
|
||||||
#if defined(EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
|
#if defined(EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0)
|
@ -7,7 +7,7 @@
|
|||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
#ifdef EIGEN_TEST_PART_100
|
#if defined(EIGEN_TEST_PART_100) || defined(EIGEN_TEST_PART_ALL)
|
||||||
# define EIGEN_NO_DEPRECATED_WARNING
|
# define EIGEN_NO_DEPRECATED_WARNING
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user