From 91e9344be9e408751a750f08067d1798a6c2c7fd Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Wed, 8 Sep 2010 14:10:01 +0200
Subject: [PATCH] fix vectorization logic and code of cross3 which was never
 enabled..

---
 Eigen/src/Geometry/OrthoMethods.h      | 2 +-
 Eigen/src/Geometry/Quaternion.h        | 3 ++-
 Eigen/src/Geometry/arch/Geometry_SSE.h | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index cb8d3458a..d03d85beb 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -54,7 +54,7 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
 
 template< int Arch,typename VectorLhs,typename VectorRhs,
           typename Scalar = typename VectorLhs::Scalar,
-          int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
+          bool Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
 struct ei_cross3_impl {
   inline static typename ei_plain_matrix_type<VectorLhs>::type
   run(const VectorLhs& lhs, const VectorRhs& rhs)
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 583d6c6aa..5e52d5b5a 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -399,7 +399,8 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
     // It appears to be much faster than the common algorithm found
     // in the litterature (30 versus 39 flops). It also requires two
     // Vector3 as temporaries.
-    Vector3 uv = Scalar(2) * this->vec().cross(v);
+    Vector3 uv = this->vec().cross(v);
+    uv += uv;
     return v + this->w() * uv + this->vec().cross(uv);
 }
 
diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h
index 798d81c91..7d82be694 100644
--- a/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -54,8 +54,8 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
   inline static typename ei_plain_matrix_type<VectorLhs>::type
   run(const VectorLhs& lhs, const VectorRhs& rhs)
   {
-    __m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
-    __m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
+    __m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
+    __m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
     __m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
     __m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
     typename ei_plain_matrix_type<VectorLhs>::type res;