fix vectorization logic and code of cross3 which was never enabled..

2025-10-11 23:51:50 +08:00 · 2010-09-08 14:10:01 +02:00 · 2010-09-08 14:10:01 +02:00 · 91e9344be9
commit 91e9344be9
parent f9123df772
3 changed files with 5 additions and 4 deletions
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@ -54,7 +54,7 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const

 template< int Arch,typename VectorLhs,typename VectorRhs,
          typename Scalar = typename VectorLhs::Scalar,
-          int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
+          bool Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
 struct ei_cross3_impl {
  inline static typename ei_plain_matrix_type<VectorLhs>::type
  run(const VectorLhs& lhs, const VectorRhs& rhs)
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@ -399,7 +399,8 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
    // It appears to be much faster than the common algorithm found
    // in the litterature (30 versus 39 flops). It also requires two
    // Vector3 as temporaries.
-    Vector3 uv = Scalar(2) * this->vec().cross(v);
+    Vector3 uv = this->vec().cross(v);
+    uv += uv;
    return v + this->w() * uv + this->vec().cross(uv);
 }

--- a/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/Eigen/src/Geometry/arch/Geometry_SSE.h
@ -54,8 +54,8 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
  inline static typename ei_plain_matrix_type<VectorLhs>::type
  run(const VectorLhs& lhs, const VectorRhs& rhs)
  {
-    __m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
-    __m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
+    __m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
+    __m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
    __m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
    __m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
    typename ei_plain_matrix_type<VectorLhs>::type res;