From 6bb6a6bf53e6728f520a80b2c5c38642243921cb Mon Sep 17 00:00:00 2001 From: Alex Druinsky Date: Tue, 26 Oct 2021 12:24:33 -0700 Subject: [PATCH] Vectorize fp16 tanh and logistic functions on Neon Activates vectorization of the Eigen::half versions of the tanh and logistic functions when they run on Neon. Both functions convert their inputs to float before computing the output, and as a result of this commit, the conversions and the computation in float are vectorized. --- Eigen/Core | 5 ++ Eigen/src/Core/arch/NEON/MathFunctions.h | 19 +++++++ Eigen/src/Core/arch/NEON/PacketMath.h | 1 + Eigen/src/Core/arch/NEON/UnaryFunctors.h | 64 ++++++++++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 Eigen/src/Core/arch/NEON/UnaryFunctors.h diff --git a/Eigen/Core b/Eigen/Core index e0da49907..d6cc16260 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -263,6 +263,11 @@ using std::ptrdiff_t; #include "src/Core/arch/GPU/Complex.h" #endif +// Specializations of vectorized activation functions for NEON. +#ifdef EIGEN_VECTORIZE_NEON +#include "src/Core/arch/NEON/UnaryFunctors.h" +#endif + #include "src/Core/util/IndexedViewHelper.h" #include "src/Core/util/ReshapedHelper.h" #include "src/Core/ArithmeticSequence.h" diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h index d34882a67..0111cf364 100644 --- a/Eigen/src/Core/arch/NEON/MathFunctions.h +++ b/Eigen/src/Core/arch/NEON/MathFunctions.h @@ -40,6 +40,25 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Pack template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ptanh(const Packet4f& x) { return internal::generic_fast_tanh_float(x); } +#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED +Packet4hf ptanh(const Packet4hf& x) { + // Convert to float, call the float ptanh, and then convert back. + return vcvt_f16_f32(ptanh(vcvt_f32_f16(x))); +} + +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED +Packet8hf ptanh(const Packet8hf& x) { + // Convert each 4 halfs to float, call the float ptanh, and then convert back. + return vcombine_f16( + vcvt_f16_f32(ptanh(vcvt_f32_f16(vget_low_f16(x)))), + vcvt_f16_f32(ptanh(vcvt_high_f32_f16(x)))); +} +#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + + BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 382a2c8f9..e908bf54f 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -4028,6 +4028,7 @@ struct packet_traits : default_packet_traits { HasCos = 0, HasLog = 0, HasExp = 0, + HasTanh = packet_traits::HasTanh, // tanh calls tanh HasSqrt = 1, HasRsqrt = 1, HasErf = EIGEN_FAST_MATH, diff --git a/Eigen/src/Core/arch/NEON/UnaryFunctors.h b/Eigen/src/Core/arch/NEON/UnaryFunctors.h new file mode 100644 index 000000000..131746dbd --- /dev/null +++ b/Eigen/src/Core/arch/NEON/UnaryFunctors.h @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NEON_UNARY_FUNCTORS_H +#define EIGEN_NEON_UNARY_FUNCTORS_H + +#include "../../InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC +/** \internal + * \brief Template specialization of the logistic function for Eigen::half. + */ +template <> +struct scalar_logistic_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Eigen::half operator()(const Eigen::half& x) const { + // Convert to float and call scalar_logistic_op. + const scalar_logistic_op float_op; + return Eigen::half(float_op(float(x))); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Eigen::half packetOp(const Eigen::half& x) const { + return this->operator()(x); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet4hf packetOp(const Packet4hf& x) const { + const scalar_logistic_op float_op; + return vcvt_f16_f32(float_op.packetOp(vcvt_f32_f16(x))); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet8hf packetOp(const Packet8hf& x) const { + const scalar_logistic_op float_op; + return vcombine_f16( + vcvt_f16_f32(float_op.packetOp(vcvt_f32_f16(vget_low_f16(x)))), + vcvt_f16_f32(float_op.packetOp(vcvt_high_f32_f16(x)))); + } +}; + +template<> +struct functor_traits> { + enum { + Cost = functor_traits>::Cost, + PacketAccess = functor_traits>::PacketAccess, + }; +}; +#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_NEON_UNARY_FUNCTORS_H