diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index ceb7a0ad7..f3d607aeb 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -206,6 +206,17 @@ struct type_casting_traits { }; }; +// provides a succint template to define vectorized casting traits with respect to the largest accessible packet types +template +struct vectorized_type_casting_traits { + enum : int { + DefaultSrcPacketSize = packet_traits::size, + DefaultTgtPacketSize = packet_traits::size, + VectorizedCast = 1, + SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1), + TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1) + }; +}; /** \internal Wrapper to ensure that multiple packet types can map to the same same underlying vector type. */ diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h index 461f3a637..98533477b 100644 --- a/Eigen/src/Core/arch/AVX/TypeCasting.h +++ b/Eigen/src/Core/arch/AVX/TypeCasting.h @@ -17,76 +17,24 @@ namespace Eigen { namespace internal { #ifndef EIGEN_VECTORIZE_AVX512 -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 2, - TgtCoeffRatio = 1 - }; -}; -#endif // EIGEN_VECTORIZE_AVX512 - -template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet8f& a) { - return _mm256_cvttps_epi32(a); -} - -template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8i& a) { - return _mm256_cvtepi32_ps(a); -} - -template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet4d& a, const Packet4d& b) { - return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a)); -} - -template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet4d& a, const Packet4d& b) { - return _mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a)); -} - -template <> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4d& a) { - return _mm256_cvtpd_ps(a); -} - -template <> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4d& a) { - return _mm256_cvttpd_epi32(a); -} +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +#endif template <> EIGEN_STRONG_INLINE Packet16b pcast(const Packet8f& a, @@ -118,6 +66,63 @@ EIGEN_STRONG_INLINE Packet16b pcast(const Packet8f& a, #endif } +template <> +EIGEN_STRONG_INLINE Packet8f pcast(const Packet16b& a) { + const __m256 cst_one = _mm256_set1_ps(1.0f); + #ifdef EIGEN_VECTORIZE_AVX2 + __m256i a_extended = _mm256_cvtepi8_epi32(a); + __m256i abcd_efgh = _mm256_cmpeq_epi32(a_extended, _mm256_setzero_si256()); + #else + __m128i abcd_efhg_ijkl_mnop = _mm_cmpeq_epi8(a, _mm_setzero_si128()); + __m128i aabb_ccdd_eeff_gghh = _mm_unpacklo_epi8(abcd_efhg_ijkl_mnop, abcd_efhg_ijkl_mnop); + __m128i aaaa_bbbb_cccc_dddd = _mm_unpacklo_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh); + __m128i eeee_ffff_gggg_hhhh = _mm_unpackhi_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh); + __m256i abcd_efgh = _mm256_setr_m128i(aaaa_bbbb_cccc_dddd, eeee_ffff_gggg_hhhh); + #endif + __m256 result = _mm256_andnot_ps(_mm256_castsi256_ps(abcd_efgh), cst_one); + return result; +} + +template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet8f& a) { + return _mm256_cvttps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet4d& a, const Packet4d& b) { + return _mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a)); +} + +template <> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4d& a) { + return _mm256_cvttpd_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8i& a) { + return _mm256_cvtepi32_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet4d& a, const Packet4d& b) { + return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a)); +} + +template <> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4d& a) { + return _mm256_cvtpd_ps(a); +} + +template <> EIGEN_STRONG_INLINE Packet4d pcast(const Packet8i& a) { + return _mm256_cvtepi32_pd(_mm256_castsi256_si128(a)); +} + +template <> EIGEN_STRONG_INLINE Packet4d pcast(const Packet4i& a) { + return _mm256_cvtepi32_pd(a); +} + +template <> EIGEN_STRONG_INLINE Packet4d pcast(const Packet8f& a) { + return _mm256_cvtps_pd(_mm256_castps256_ps128(a)); +} + +template <> EIGEN_STRONG_INLINE Packet4d pcast(const Packet4f& a) { + return _mm256_cvtps_pd(a); +} + template<> EIGEN_STRONG_INLINE Packet8i preinterpret(const Packet8f& a) { return _mm256_castps_si256(a); } diff --git a/Eigen/src/Core/arch/AVX512/TypeCasting.h b/Eigen/src/Core/arch/AVX512/TypeCasting.h index 2f38d7f80..02c56282f 100644 --- a/Eigen/src/Core/arch/AVX512/TypeCasting.h +++ b/Eigen/src/Core/arch/AVX512/TypeCasting.h @@ -16,23 +16,23 @@ namespace Eigen { namespace internal { -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; + +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; + +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; + +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; + +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; template<> EIGEN_STRONG_INLINE Packet16b pcast(const Packet16f& a) { __mmask16 mask = _mm512_cmpneq_ps_mask(a, pzero(a)); @@ -47,10 +47,26 @@ template<> EIGEN_STRONG_INLINE Packet16i pcast(const Packe return _mm512_cvttps_epi32(a); } +template<> EIGEN_STRONG_INLINE Packet8d pcast(const Packet16f& a) { + return _mm512_cvtps_pd(_mm512_castps512_ps256(a)); +} + +template<> EIGEN_STRONG_INLINE Packet8d pcast(const Packet8f& a) { + return _mm512_cvtps_pd(a); +} + template<> EIGEN_STRONG_INLINE Packet16f pcast(const Packet16i& a) { return _mm512_cvtepi32_ps(a); } +template<> EIGEN_STRONG_INLINE Packet8d pcast(const Packet16i& a) { + return _mm512_cvtepi32_pd(_mm512_castsi512_si256(a)); +} + +template<> EIGEN_STRONG_INLINE Packet8d pcast(const Packet8i& a) { + return _mm512_cvtepi32_pd(a); +} + template<> EIGEN_STRONG_INLINE Packet16f pcast(const Packet8d& a, const Packet8d& b) { return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b)); } @@ -131,80 +147,26 @@ template<> EIGEN_STRONG_INLINE Packet8bf preinterpret(con #ifndef EIGEN_VECTORIZE_AVX512FP16 -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; - template<> EIGEN_STRONG_INLINE Packet16f pcast(const Packet16h& a) { return half2float(a); } -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; - template<> EIGEN_STRONG_INLINE Packet16h pcast(const Packet16f& a) { return float2half(a); } #endif -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; - template<> EIGEN_STRONG_INLINE Packet16f pcast(const Packet16bf& a) { return Bf16ToF32(a); } -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; - template<> EIGEN_STRONG_INLINE Packet16bf pcast(const Packet16f& a) { return F32ToBf16(a); } #ifdef EIGEN_VECTORIZE_AVX512FP16 -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 2 - }; -}; - -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 2, - TgtCoeffRatio = 1 - }; -}; - template<> EIGEN_STRONG_INLINE Packet16h preinterpret(const Packet32h& a) { return _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(a), 0)); } @@ -257,7 +219,7 @@ EIGEN_STRONG_INLINE Packet8h pcast(const Packet4f& a, const __m256 result = _mm256_undefined_ps(); result = _mm256_insertf128_ps(result, a, 0); result = _mm256_insertf128_ps(result, b, 1); - return _mm256_cvtps_ph(result, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC); + return _mm256_cvtps_ph(result, _MM_FROUND_TO_NEAREST_INT); } diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h index 0b5aa1c78..bb2817030 100644 --- a/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -17,61 +17,19 @@ namespace Eigen { namespace internal { #ifndef EIGEN_VECTORIZE_AVX -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 4, - TgtCoeffRatio = 1 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 2 - }; -}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; + +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; + +template<> struct type_casting_traits : vectorized_type_casting_traits {}; +template<> struct type_casting_traits : vectorized_type_casting_traits {}; #endif -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; - -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; -}; - -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 2, - TgtCoeffRatio = 1 - }; -}; - -template <> -struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 2, - TgtCoeffRatio = 1 - }; -}; - template <> EIGEN_STRONG_INLINE Packet16b pcast(const Packet4f& a, const Packet4f& b, @@ -88,10 +46,31 @@ EIGEN_STRONG_INLINE Packet16b pcast(const Packet4f& a, return _mm_and_si128(merged, _mm_set1_epi8(1)); } +template <> +EIGEN_STRONG_INLINE Packet4f pcast(const Packet16b& a) { + const __m128 cst_one = _mm_set_ps1(1.0f); + #ifdef EIGEN_VECTORIZE_SSE4_1 + __m128i a_extended = _mm_cvtepi8_epi32(a); + __m128i abcd = _mm_cmpeq_epi32(a_extended, _mm_setzero_si128()); + #else + __m128i abcd_efhg_ijkl_mnop = _mm_cmpeq_epi8(a, _mm_setzero_si128()); + __m128i aabb_ccdd_eeff_gghh = _mm_unpacklo_epi8(abcd_efhg_ijkl_mnop, abcd_efhg_ijkl_mnop); + __m128i abcd = _mm_unpacklo_epi8(aabb_ccdd_eeff_gghh, aabb_ccdd_eeff_gghh); + #endif + __m128 result = _mm_andnot_ps(_mm_castsi128_ps(abcd), cst_one); + return result; +} + template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { return _mm_cvttps_epi32(a); } +template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet2d& a, const Packet2d& b) { + return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvttpd_epi32(a)), + _mm_castsi128_ps(_mm_cvttpd_epi32(b)), + (1 << 2) | (1 << 6))); +} + template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { return _mm_cvtepi32_ps(a); } @@ -100,10 +79,9 @@ template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet2d return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); } -template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet2d& a, const Packet2d& b) { - return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvttpd_epi32(a)), - _mm_castsi128_ps(_mm_cvttpd_epi32(b)), - (1 << 2) | (1 << 6))); +template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4i& a) { + // Simply discard the second half of the input + return _mm_cvtepi32_pd(a); } template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4f& a) { diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp index d06fa2c70..49e667252 100644 --- a/test/array_cwise.cpp +++ b/test/array_cwise.cpp @@ -1211,6 +1211,22 @@ void typed_logicals_test(const ArrayType& m) { typed_logicals_test_impl::run(m); } +// print non-mangled typenames +template std::string printTypeInfo(const T&) { return typeid(T).name(); } +template<> std::string printTypeInfo(const int8_t&) { return "int8_t"; } +template<> std::string printTypeInfo(const int16_t&) { return "int16_t"; } +template<> std::string printTypeInfo(const int32_t&) { return "int32_t"; } +template<> std::string printTypeInfo(const int64_t&) { return "int64_t"; } +template<> std::string printTypeInfo(const uint8_t&) { return "uint8_t"; } +template<> std::string printTypeInfo(const uint16_t&) { return "uint16_t"; } +template<> std::string printTypeInfo(const uint32_t&) { return "uint32_t"; } +template<> std::string printTypeInfo(const uint64_t&) { return "uint64_t"; } +template<> std::string printTypeInfo(const float&) { return "float"; } +template<> std::string printTypeInfo(const double&) { return "double"; } +//template<> std::string printTypeInfo(const long double&) { return "long double"; } +template<> std::string printTypeInfo(const half&) { return "half"; } +template<> std::string printTypeInfo(const bfloat16&) { return "bfloat16"; } + template struct cast_test_impl { using SrcArray = Array; @@ -1225,63 +1241,30 @@ struct cast_test_impl { static constexpr int DstPacketSize = internal::packet_traits::size; static constexpr int MaxPacketSize = internal::plain_enum_max(SrcPacketSize, DstPacketSize); - // print non-mangled typenames - template - static std::string printTypeInfo(const T&) { - if (internal::is_same::value) - return "bool"; - else if (internal::is_same::value) - return "int8_t"; - else if (internal::is_same::value) - return "int16_t"; - else if (internal::is_same::value) - return "int32_t"; - else if (internal::is_same::value) - return "int64_t"; - else if (internal::is_same::value) - return "uint8_t"; - else if (internal::is_same::value) - return "uint16_t"; - else if (internal::is_same::value) - return "uint32_t"; - else if (internal::is_same::value) - return "uint64_t"; - else if (internal::is_same::value) - return "float"; - else if (internal::is_same::value) - return "double"; - //else if (internal::is_same::value) - // return "long double"; - else if (internal::is_same::value) - return "half"; - else if (internal::is_same::value) - return "bfloat16"; - else - return typeid(T).name(); - } - static void run() { const Index testRows = RowsAtCompileTime == Dynamic ? ((10 * MaxPacketSize) + 1) : RowsAtCompileTime; const Index testCols = ColsAtCompileTime == Dynamic ? ((10 * MaxPacketSize) + 1) : ColsAtCompileTime; const Index testSize = testRows * testCols; const Index minTestSize = 100; const Index repeats = numext::div_ceil(minTestSize, testSize); + SrcArray src(testRows, testCols); DstArray dst(testRows, testCols); + for (Index repeat = 0; repeat < repeats; repeat++) { src = src.unaryExpr(RandomOp()); dst = src.template cast(); - for (Index i = 0; i < testRows; i++) - for (Index j = 0; j < testCols; j++) { - DstType ref = internal::cast_impl::run(src(i, j)); - bool all_nan = ((numext::isnan)(src(i, j)) && (numext::isnan)(ref) && (numext::isnan)(dst(i, j))); - bool is_equal = ref == dst(i, j); - bool pass = all_nan || is_equal; - if (!pass) { - std::cout << printTypeInfo(SrcType()) << ": [" << +src(i, j) << "] to " << printTypeInfo(DstType()) << ": [" - << +dst(i, j) << "] != [" << +ref << "]\n"; - } - VERIFY(pass); + + for (Index j = 0; j < testCols; j++) + for (Index i = 0; i < testRows; i++) { + SrcType srcVal = src(i, j); + DstType refVal = internal::cast_impl::run(srcVal); + DstType dstVal = dst(i, j); + bool isApprox = verifyIsApprox(dstVal, refVal); + if (!isApprox) + std::cout << printTypeInfo(srcVal) << ": [" << +srcVal << "] to " << printTypeInfo(dstVal) << ": [" + << +dstVal << "] != [" << +refVal << "]\n"; + VERIFY(isApprox); } } }