Avoid promotion of Arm __fp16 to float in Neon PacketMath

Using overloaded arithmetic operators for Arm __fp16 always
causes a promotion to float. We replace operator* by vmulh_f16
to avoid this.
This commit is contained in:
David Tellenbach 2020-11-17 20:19:44 +01:00
parent 117a4c0617
commit e9b55c4db8

View File

@ -4355,7 +4355,7 @@ EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8hf>(const Packet8hf& a) {
prod = vmul_f16(prod, vrev64_f16(prod));
Eigen::half h;
h.x = vget_lane_f16(prod, 0) * vget_lane_f16(prod, 1);
h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1));
return h;
}
@ -4364,7 +4364,7 @@ EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4hf>(const Packet4hf& a) {
float16x4_t prod;
prod = vmul_f16(a, vrev64_f16(a));
Eigen::half h;
h.x = vget_lane_f16(prod, 0) * vget_lane_f16(prod, 1);
h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1));
return h;
}