mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 10:09:36 +08:00
Add more missing vectorized casts for int on x86, and remove redundant unit tests
This commit is contained in:
parent
33e206f714
commit
b8b8a26145
@ -64,7 +64,6 @@ struct type_casting_traits<float, bool> {
|
|||||||
};
|
};
|
||||||
#endif // EIGEN_VECTORIZE_AVX512
|
#endif // EIGEN_VECTORIZE_AVX512
|
||||||
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
||||||
return _mm256_cvttps_epi32(a);
|
return _mm256_cvttps_epi32(a);
|
||||||
}
|
}
|
||||||
@ -77,6 +76,10 @@ template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet4d, Packet8f>(const Packet4d
|
|||||||
return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
|
return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet4d, Packet8i>(const Packet4d& a, const Packet4d& b) {
|
||||||
|
return _mm256_set_m128i(_mm256_cvtpd_epi32(b), _mm256_cvtpd_epi32(a));
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a,
|
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a,
|
||||||
const Packet8f& b) {
|
const Packet8f& b) {
|
||||||
|
@ -544,6 +544,8 @@ EIGEN_STRONG_INLINE Packet8d pmax<PropagateNaN, Packet8d>(const Packet8d& a, con
|
|||||||
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); }
|
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); }
|
||||||
template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); }
|
template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); }
|
||||||
EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }
|
EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }
|
||||||
|
EIGEN_STRONG_INLINE Packet16i cat256i(Packet8i a, Packet8i b) {
|
||||||
|
return _mm512_insertf32x8(_mm512_castsi256_si512(a),b,1); }
|
||||||
#else
|
#else
|
||||||
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
|
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
|
||||||
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) {
|
template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) {
|
||||||
@ -559,6 +561,9 @@ EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) {
|
|||||||
return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),
|
return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),
|
||||||
_mm256_castps_si256(b),1));
|
_mm256_castps_si256(b),1));
|
||||||
}
|
}
|
||||||
|
EIGEN_STRONG_INLINE Packet16i cat256i(Packet8i a, Packet8i b) {
|
||||||
|
return _mm512_inserti64x4(_mm512_castsi256_si512(a), b, 1);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Helper function for bit packing snippet of low precision comparison.
|
// Helper function for bit packing snippet of low precision comparison.
|
||||||
|
@ -55,6 +55,10 @@ template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet8d, Packet16f>(const Packet
|
|||||||
return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b));
|
return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet8d, Packet16i>(const Packet8d& a, const Packet8d& b) {
|
||||||
|
return cat256i(_mm512_cvtpd_epi32(a), _mm512_cvtpd_epi32(b));
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
|
template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
|
||||||
return _mm512_castps_si512(a);
|
return _mm512_castps_si512(a);
|
||||||
}
|
}
|
||||||
|
@ -27,13 +27,14 @@ struct type_casting_traits<float, bool> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<float, int> {
|
struct type_casting_traits<float, double> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 1,
|
SrcCoeffRatio = 1,
|
||||||
TgtCoeffRatio = 1
|
TgtCoeffRatio = 2
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<int, float> {
|
struct type_casting_traits<int, float> {
|
||||||
@ -45,14 +46,22 @@ struct type_casting_traits<int, float> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<float, double> {
|
struct type_casting_traits<float, int> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 1,
|
SrcCoeffRatio = 1,
|
||||||
TgtCoeffRatio = 2
|
TgtCoeffRatio = 1
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct type_casting_traits<double, int> {
|
||||||
|
enum {
|
||||||
|
VectorizedCast = 1,
|
||||||
|
SrcCoeffRatio = 2,
|
||||||
|
TgtCoeffRatio = 1
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<double, float> {
|
struct type_casting_traits<double, float> {
|
||||||
@ -91,6 +100,12 @@ template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d
|
|||||||
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
|
||||||
|
return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvtpd_epi32(a)),
|
||||||
|
_mm_castsi128_ps(_mm_cvtpd_epi32(b)),
|
||||||
|
(1 << 2) | (1 << 6)));
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
|
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
|
||||||
// Simply discard the second half of the input
|
// Simply discard the second half of the input
|
||||||
return _mm_cvtps_pd(a);
|
return _mm_cvtps_pd(a);
|
||||||
|
@ -15,113 +15,23 @@
|
|||||||
using Eigen::Tensor;
|
using Eigen::Tensor;
|
||||||
using Eigen::array;
|
using Eigen::array;
|
||||||
|
|
||||||
static void test_simple_cast()
|
|
||||||
{
|
|
||||||
Tensor<float, 2> ftensor(20,30);
|
|
||||||
ftensor = ftensor.random() * 100.f;
|
|
||||||
Tensor<char, 2> chartensor(20,30);
|
|
||||||
chartensor.setRandom();
|
|
||||||
Tensor<std::complex<float>, 2> cplextensor(20,30);
|
|
||||||
cplextensor.setRandom();
|
|
||||||
|
|
||||||
chartensor = ftensor.cast<char>();
|
|
||||||
cplextensor = ftensor.cast<std::complex<float> >();
|
|
||||||
|
|
||||||
for (int i = 0; i < 20; ++i) {
|
|
||||||
for (int j = 0; j < 30; ++j) {
|
|
||||||
VERIFY_IS_EQUAL(chartensor(i,j), static_cast<char>(ftensor(i,j)));
|
|
||||||
VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float> >(ftensor(i,j)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void test_vectorized_cast()
|
|
||||||
{
|
|
||||||
Tensor<int, 2> itensor(20,30);
|
|
||||||
itensor = itensor.random() / 1000;
|
|
||||||
Tensor<float, 2> ftensor(20,30);
|
|
||||||
ftensor.setRandom();
|
|
||||||
Tensor<double, 2> dtensor(20,30);
|
|
||||||
dtensor.setRandom();
|
|
||||||
|
|
||||||
ftensor = itensor.cast<float>();
|
|
||||||
dtensor = itensor.cast<double>();
|
|
||||||
|
|
||||||
for (int i = 0; i < 20; ++i) {
|
|
||||||
for (int j = 0; j < 30; ++j) {
|
|
||||||
VERIFY_IS_EQUAL(itensor(i,j), static_cast<int>(ftensor(i,j)));
|
|
||||||
VERIFY_IS_EQUAL(dtensor(i,j), static_cast<double>(ftensor(i,j)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void test_float_to_int_cast()
|
|
||||||
{
|
|
||||||
Tensor<float, 2> ftensor(20,30);
|
|
||||||
ftensor = ftensor.random() * 1000.0f;
|
|
||||||
Tensor<double, 2> dtensor(20,30);
|
|
||||||
dtensor = dtensor.random() * 1000.0;
|
|
||||||
|
|
||||||
Tensor<int, 2> i1tensor = ftensor.cast<int>();
|
|
||||||
Tensor<int, 2> i2tensor = dtensor.cast<int>();
|
|
||||||
|
|
||||||
for (int i = 0; i < 20; ++i) {
|
|
||||||
for (int j = 0; j < 30; ++j) {
|
|
||||||
VERIFY_IS_EQUAL(i1tensor(i,j), static_cast<int>(ftensor(i,j)));
|
|
||||||
VERIFY_IS_EQUAL(i2tensor(i,j), static_cast<int>(dtensor(i,j)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void test_big_to_small_type_cast()
|
|
||||||
{
|
|
||||||
Tensor<double, 2> dtensor(20, 30);
|
|
||||||
dtensor.setRandom();
|
|
||||||
Tensor<float, 2> ftensor(20, 30);
|
|
||||||
ftensor = dtensor.cast<float>();
|
|
||||||
|
|
||||||
for (int i = 0; i < 20; ++i) {
|
|
||||||
for (int j = 0; j < 30; ++j) {
|
|
||||||
VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void test_small_to_big_type_cast()
|
|
||||||
{
|
|
||||||
Tensor<float, 2> ftensor(20, 30);
|
|
||||||
ftensor.setRandom();
|
|
||||||
Tensor<double, 2> dtensor(20, 30);
|
|
||||||
dtensor = ftensor.cast<double>();
|
|
||||||
|
|
||||||
for (int i = 0; i < 20; ++i) {
|
|
||||||
for (int j = 0; j < 30; ++j) {
|
|
||||||
VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename FromType, typename ToType>
|
template <typename FromType, typename ToType>
|
||||||
static void test_type_cast() {
|
static void test_type_cast() {
|
||||||
Tensor<FromType, 2> ftensor(100, 200);
|
Tensor<FromType, 2> ftensor(101, 201);
|
||||||
// Generate random values for a valid cast.
|
// Generate random values for a valid cast.
|
||||||
for (int i = 0; i < 100; ++i) {
|
for (int i = 0; i < 101; ++i) {
|
||||||
for (int j = 0; j < 200; ++j) {
|
for (int j = 0; j < 201; ++j) {
|
||||||
ftensor(i, j) = internal::random_without_cast_overflow<FromType,ToType>::value();
|
ftensor(i, j) = internal::random_without_cast_overflow<FromType,ToType>::value();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor<ToType, 2> ttensor(100, 200);
|
Tensor<ToType, 2> ttensor(101, 201);
|
||||||
ttensor = ftensor.template cast<ToType>();
|
ttensor = ftensor.template cast<ToType>();
|
||||||
|
|
||||||
for (int i = 0; i < 100; ++i) {
|
for (int i = 0; i < 101; ++i) {
|
||||||
for (int j = 0; j < 200; ++j) {
|
for (int j = 0; j < 201; ++j) {
|
||||||
const ToType ref = internal::cast<FromType,ToType>(ftensor(i, j));
|
const ToType ref = static_cast<ToType>(ftensor(i, j));
|
||||||
VERIFY_IS_APPROX(ttensor(i, j), ref);
|
VERIFY_IS_EQUAL(ttensor(i, j), ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -161,12 +71,6 @@ struct test_cast_runner<Scalar, std::enable_if_t<NumTraits<Scalar>::IsComplex>>
|
|||||||
|
|
||||||
EIGEN_DECLARE_TEST(cxx11_tensor_casts)
|
EIGEN_DECLARE_TEST(cxx11_tensor_casts)
|
||||||
{
|
{
|
||||||
CALL_SUBTEST(test_simple_cast());
|
|
||||||
CALL_SUBTEST(test_vectorized_cast());
|
|
||||||
CALL_SUBTEST(test_float_to_int_cast());
|
|
||||||
CALL_SUBTEST(test_big_to_small_type_cast());
|
|
||||||
CALL_SUBTEST(test_small_to_big_type_cast());
|
|
||||||
|
|
||||||
CALL_SUBTEST(test_cast_runner<bool>::run());
|
CALL_SUBTEST(test_cast_runner<bool>::run());
|
||||||
CALL_SUBTEST(test_cast_runner<int8_t>::run());
|
CALL_SUBTEST(test_cast_runner<int8_t>::run());
|
||||||
CALL_SUBTEST(test_cast_runner<int16_t>::run());
|
CALL_SUBTEST(test_cast_runner<int16_t>::run());
|
||||||
|
Loading…
x
Reference in New Issue
Block a user