Update pshiftleft to pass the shift as a true compile-time integer.

2025-09-24 07:13:16 +08:00 · 2018-11-27 22:57:30 +01:00 · 2018-11-27 22:57:30 +01:00 · a1a5fbbd21
commit a1a5fbbd21
parent fa7fd61eda
4 changed files with 11 additions and 9 deletions
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@ -219,9 +219,11 @@ pxor(const Packet& a, const Packet& b) { return a ^ b; }
 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pandnot(const Packet& a, const Packet& b) { return a & (!b); }

-/** \internal \returns \a a shifted by n bits */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pshiftleft(const Packet& a, int n); /* { return a << n; } */
+/** \internal \returns \a a shifted by N bits */
+template<int N> EIGEN_DEVICE_FUNC inline int
+pshiftleft(const int& a) { return a << N; }
+template<int N> EIGEN_DEVICE_FUNC inline long int
+pshiftleft(const long int& a) { return a << N; }

 /** \internal \returns the significant and exponent of the underlying floating point numbers
  * See https://en.cppreference.com/w/cpp/numeric/math/frexp
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@ -256,12 +256,12 @@ template<> EIGEN_STRONG_INLINE Packet8f pselect<Packet8f>(const Packet8f& mask,
 template<> EIGEN_STRONG_INLINE Packet4d pselect<Packet4d>(const Packet4d& mask, const Packet4d& a, const Packet4d& b)
 { return _mm256_blendv_pd(b,a,mask); }

-template<> EIGEN_STRONG_INLINE Packet8i pshiftleft<Packet8i>(const Packet8i& a, int n) {
+template<int N> EIGEN_STRONG_INLINE Packet8i pshiftleft(const Packet8i& a) {
 #ifdef EIGEN_VECTORIZE_AVX2
-  return _mm256_slli_epi32(a, n);
+  return _mm256_slli_epi32(a, N);
 #else
-  __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(a, 0), n);
-  __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(a, 1), n);
+  __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(a, 0), N);
+  __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(a, 1), N);
  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
 #endif
 }
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@ -270,7 +270,7 @@ Packet psin_float(const Packet& _x)

  // Compute the sign to apply to the polynomial.
  // sign = third_bit(y_int1) xor signbit(_x)
-  Packet sign_bit = pxor(_x, preinterpret<Packet>(pshiftleft(y_int1, 29)));
+  Packet sign_bit = pxor(_x, preinterpret<Packet>(pshiftleft<29>(y_int1)));
  sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit

  // Get the polynomial selection mask from the second bit of y_int1
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -370,7 +370,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con
 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); }
 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); }

-template<> EIGEN_STRONG_INLINE Packet4i pshiftleft<Packet4i>(const Packet4i& a, int n) { return _mm_slli_epi32(a,n); }
+template<int N> EIGEN_STRONG_INLINE Packet4i pshiftleft(const Packet4i& a) { return _mm_slli_epi32(a,N); }

 #ifdef EIGEN_VECTORIZE_SSE4_1
 template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }