Add more missing vectorized casts for int on x86, and remove redundant unit tests

This commit is contained in:
Rasmus Munk Larsen 2023-03-24 16:02:00 +00:00
parent 33e206f714
commit b8b8a26145
5 changed files with 41 additions and 110 deletions

View File

@ -64,7 +64,6 @@ struct type_casting_traits<float, bool> {
}; };
#endif // EIGEN_VECTORIZE_AVX512 #endif // EIGEN_VECTORIZE_AVX512
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) { template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
return _mm256_cvttps_epi32(a); return _mm256_cvttps_epi32(a);
} }
@ -77,6 +76,10 @@ template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet4d, Packet8f>(const Packet4d
return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a)); return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
} }
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet4d, Packet8i>(const Packet4d& a, const Packet4d& b) {
return _mm256_set_m128i(_mm256_cvtpd_epi32(b), _mm256_cvtpd_epi32(a));
}
template <> template <>
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a, EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a,
const Packet8f& b) { const Packet8f& b) {

View File

@ -544,6 +544,8 @@ EIGEN_STRONG_INLINE Packet8d pmax<PropagateNaN, Packet8d>(const Packet8d& a, con
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); } template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); }
template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); } template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); }
EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); } EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }
EIGEN_STRONG_INLINE Packet16i cat256i(Packet8i a, Packet8i b) {
return _mm512_insertf32x8(_mm512_castsi256_si512(a),b,1); }
#else #else
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512 // AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) {
@ -559,6 +561,9 @@ EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) {
return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)), return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),
_mm256_castps_si256(b),1)); _mm256_castps_si256(b),1));
} }
EIGEN_STRONG_INLINE Packet16i cat256i(Packet8i a, Packet8i b) {
return _mm512_inserti64x4(_mm512_castsi256_si512(a), b, 1);
}
#endif #endif
// Helper function for bit packing snippet of low precision comparison. // Helper function for bit packing snippet of low precision comparison.

View File

@ -55,6 +55,10 @@ template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet8d, Packet16f>(const Packet
return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b)); return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b));
} }
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet8d, Packet16i>(const Packet8d& a, const Packet8d& b) {
return cat256i(_mm512_cvtpd_epi32(a), _mm512_cvtpd_epi32(b));
}
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) { template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
return _mm512_castps_si512(a); return _mm512_castps_si512(a);
} }

View File

@ -27,13 +27,14 @@ struct type_casting_traits<float, bool> {
}; };
template <> template <>
struct type_casting_traits<float, int> { struct type_casting_traits<float, double> {
enum { enum {
VectorizedCast = 1, VectorizedCast = 1,
SrcCoeffRatio = 1, SrcCoeffRatio = 1,
TgtCoeffRatio = 1 TgtCoeffRatio = 2
}; };
}; };
#endif
template <> template <>
struct type_casting_traits<int, float> { struct type_casting_traits<int, float> {
@ -45,14 +46,22 @@ struct type_casting_traits<int, float> {
}; };
template <> template <>
struct type_casting_traits<float, double> { struct type_casting_traits<float, int> {
enum { enum {
VectorizedCast = 1, VectorizedCast = 1,
SrcCoeffRatio = 1, SrcCoeffRatio = 1,
TgtCoeffRatio = 2 TgtCoeffRatio = 1
};
};
template <>
struct type_casting_traits<double, int> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 2,
TgtCoeffRatio = 1
}; };
}; };
#endif
template <> template <>
struct type_casting_traits<double, float> { struct type_casting_traits<double, float> {
@ -91,6 +100,12 @@ template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
} }
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvtpd_epi32(a)),
_mm_castsi128_ps(_mm_cvtpd_epi32(b)),
(1 << 2) | (1 << 6)));
}
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) { template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
// Simply discard the second half of the input // Simply discard the second half of the input
return _mm_cvtps_pd(a); return _mm_cvtps_pd(a);

View File

@ -15,113 +15,23 @@
using Eigen::Tensor; using Eigen::Tensor;
using Eigen::array; using Eigen::array;
static void test_simple_cast()
{
Tensor<float, 2> ftensor(20,30);
ftensor = ftensor.random() * 100.f;
Tensor<char, 2> chartensor(20,30);
chartensor.setRandom();
Tensor<std::complex<float>, 2> cplextensor(20,30);
cplextensor.setRandom();
chartensor = ftensor.cast<char>();
cplextensor = ftensor.cast<std::complex<float> >();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(chartensor(i,j), static_cast<char>(ftensor(i,j)));
VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float> >(ftensor(i,j)));
}
}
}
static void test_vectorized_cast()
{
Tensor<int, 2> itensor(20,30);
itensor = itensor.random() / 1000;
Tensor<float, 2> ftensor(20,30);
ftensor.setRandom();
Tensor<double, 2> dtensor(20,30);
dtensor.setRandom();
ftensor = itensor.cast<float>();
dtensor = itensor.cast<double>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(itensor(i,j), static_cast<int>(ftensor(i,j)));
VERIFY_IS_EQUAL(dtensor(i,j), static_cast<double>(ftensor(i,j)));
}
}
}
static void test_float_to_int_cast()
{
Tensor<float, 2> ftensor(20,30);
ftensor = ftensor.random() * 1000.0f;
Tensor<double, 2> dtensor(20,30);
dtensor = dtensor.random() * 1000.0;
Tensor<int, 2> i1tensor = ftensor.cast<int>();
Tensor<int, 2> i2tensor = dtensor.cast<int>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(i1tensor(i,j), static_cast<int>(ftensor(i,j)));
VERIFY_IS_EQUAL(i2tensor(i,j), static_cast<int>(dtensor(i,j)));
}
}
}
static void test_big_to_small_type_cast()
{
Tensor<double, 2> dtensor(20, 30);
dtensor.setRandom();
Tensor<float, 2> ftensor(20, 30);
ftensor = dtensor.cast<float>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
}
}
}
static void test_small_to_big_type_cast()
{
Tensor<float, 2> ftensor(20, 30);
ftensor.setRandom();
Tensor<double, 2> dtensor(20, 30);
dtensor = ftensor.cast<double>();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
}
}
}
template <typename FromType, typename ToType> template <typename FromType, typename ToType>
static void test_type_cast() { static void test_type_cast() {
Tensor<FromType, 2> ftensor(100, 200); Tensor<FromType, 2> ftensor(101, 201);
// Generate random values for a valid cast. // Generate random values for a valid cast.
for (int i = 0; i < 100; ++i) { for (int i = 0; i < 101; ++i) {
for (int j = 0; j < 200; ++j) { for (int j = 0; j < 201; ++j) {
ftensor(i, j) = internal::random_without_cast_overflow<FromType,ToType>::value(); ftensor(i, j) = internal::random_without_cast_overflow<FromType,ToType>::value();
} }
} }
Tensor<ToType, 2> ttensor(100, 200); Tensor<ToType, 2> ttensor(101, 201);
ttensor = ftensor.template cast<ToType>(); ttensor = ftensor.template cast<ToType>();
for (int i = 0; i < 100; ++i) { for (int i = 0; i < 101; ++i) {
for (int j = 0; j < 200; ++j) { for (int j = 0; j < 201; ++j) {
const ToType ref = internal::cast<FromType,ToType>(ftensor(i, j)); const ToType ref = static_cast<ToType>(ftensor(i, j));
VERIFY_IS_APPROX(ttensor(i, j), ref); VERIFY_IS_EQUAL(ttensor(i, j), ref);
} }
} }
} }
@ -161,12 +71,6 @@ struct test_cast_runner<Scalar, std::enable_if_t<NumTraits<Scalar>::IsComplex>>
EIGEN_DECLARE_TEST(cxx11_tensor_casts) EIGEN_DECLARE_TEST(cxx11_tensor_casts)
{ {
CALL_SUBTEST(test_simple_cast());
CALL_SUBTEST(test_vectorized_cast());
CALL_SUBTEST(test_float_to_int_cast());
CALL_SUBTEST(test_big_to_small_type_cast());
CALL_SUBTEST(test_small_to_big_type_cast());
CALL_SUBTEST(test_cast_runner<bool>::run()); CALL_SUBTEST(test_cast_runner<bool>::run());
CALL_SUBTEST(test_cast_runner<int8_t>::run()); CALL_SUBTEST(test_cast_runner<int8_t>::run());
CALL_SUBTEST(test_cast_runner<int16_t>::run()); CALL_SUBTEST(test_cast_runner<int16_t>::run());