mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 17:49:36 +08:00
Re-enable Arm Neon Eigen::half packets of size 8
- Add predux_half_dowto4 - Remove explicit casts in Half.h to match the behaviour of BFloat16.h - Enable more packetmath tests for Eigen::half
This commit is contained in:
parent
17268b155d
commit
11e4056f6b
@ -159,6 +159,10 @@ struct half : public half_impl::half_base {
|
||||
explicit EIGEN_DEVICE_FUNC half(std::complex<RealScalar> c)
|
||||
: half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(c.real()))) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
|
||||
return half_impl::half_to_float(*this);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
|
||||
// +0.0 and -0.0 become false, everything else becomes true.
|
||||
#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
|
||||
@ -167,47 +171,6 @@ struct half : public half_impl::half_base {
|
||||
return (x & 0x7fff) != 0;
|
||||
#endif
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
|
||||
return static_cast<signed char>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
|
||||
return static_cast<unsigned char>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
|
||||
return static_cast<short>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(numext::uint16_t) const {
|
||||
return static_cast<numext::uint16_t>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
|
||||
return static_cast<int>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
|
||||
return static_cast<unsigned int>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
|
||||
return static_cast<long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
|
||||
return static_cast<unsigned long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
|
||||
return static_cast<long long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
|
||||
return static_cast<unsigned long long>(half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
|
||||
return half_impl::half_to_float(*this);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
|
||||
return static_cast<double>(half_impl::half_to_float(*this));
|
||||
}
|
||||
|
||||
template<typename RealScalar>
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(std::complex<RealScalar>) const {
|
||||
return std::complex<RealScalar>(static_cast<RealScalar>(*this), RealScalar(0));
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
@ -686,6 +649,12 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
|
||||
return half(::tanhf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) {
|
||||
return half(::asinf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) {
|
||||
return half(::acosf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
||||
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
|
||||
defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
@ -694,6 +663,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
||||
return half(::floorf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half rint(const half& a) {
|
||||
return half(::rintf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
|
||||
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
|
||||
defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
|
@ -3849,16 +3849,15 @@ template<> EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& _x){ return vsqrtq
|
||||
typedef float16x4_t Packet4hf;
|
||||
typedef float16x8_t Packet8hf;
|
||||
|
||||
// TODO(tellenbach): Enable packets of size 8 as soon as the GEBP can handle them
|
||||
template <>
|
||||
struct packet_traits<Eigen::half> : default_packet_traits {
|
||||
typedef Packet4hf type;
|
||||
typedef Packet8hf type;
|
||||
typedef Packet4hf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 0,
|
||||
size = 8,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasCmp = 1,
|
||||
HasCast = 1,
|
||||
@ -3904,7 +3903,7 @@ struct unpacket_traits<Packet4hf> {
|
||||
template <>
|
||||
struct unpacket_traits<Packet8hf> {
|
||||
typedef Eigen::half type;
|
||||
typedef Packet8hf half;
|
||||
typedef Packet4hf half;
|
||||
enum {
|
||||
size = 8,
|
||||
alignment = Aligned16,
|
||||
@ -3914,6 +3913,11 @@ struct unpacket_traits<Packet8hf> {
|
||||
};
|
||||
};
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC Packet4hf predux_half_dowto4<Packet8hf>(const Packet8hf& a) {
|
||||
return vadd_f16(vget_low_f16(a), vget_high_f16(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8hf pset1<Packet8hf>(const Eigen::half& from) {
|
||||
return vdupq_n_f16(from.x);
|
||||
@ -4418,7 +4422,8 @@ EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4hf>(const Packet4hf& a) {
|
||||
return h;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8hf, 4>& kernel) {
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8hf, 4>& kernel)
|
||||
{
|
||||
EIGEN_ALIGN16 Eigen::half in[4][8];
|
||||
|
||||
pstore<Eigen::half>(in[0], kernel.packet[0]);
|
||||
@ -4432,11 +4437,11 @@ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8hf, 4>& kernel) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
out[i][j] = in[j][2*i];
|
||||
out[i][j] = in[j][2 * i];
|
||||
}
|
||||
EIGEN_UNROLL_LOOP
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
out[i][j+4] = in[j][2*i+1];
|
||||
out[i][j + 4] = in[j][2 * i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1073,7 +1073,7 @@ EIGEN_DECLARE_TEST(packetmath) {
|
||||
CALL_SUBTEST_10(test::runner<uint64_t>::run());
|
||||
CALL_SUBTEST_11(test::runner<std::complex<float> >::run());
|
||||
CALL_SUBTEST_12(test::runner<std::complex<double> >::run());
|
||||
CALL_SUBTEST_13((packetmath<half, internal::packet_traits<half>::type>()));
|
||||
CALL_SUBTEST_13(test::runner<half>::run());
|
||||
CALL_SUBTEST_14((packetmath<bool, internal::packet_traits<bool>::type>()));
|
||||
CALL_SUBTEST_15(test::runner<bfloat16>::run());
|
||||
g_first_pass = false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user