From 91e9344be9e408751a750f08067d1798a6c2c7fd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 8 Sep 2010 14:10:01 +0200 Subject: [PATCH] fix vectorization logic and code of cross3 which was never enabled.. --- Eigen/src/Geometry/OrthoMethods.h | 2 +- Eigen/src/Geometry/Quaternion.h | 3 ++- Eigen/src/Geometry/arch/Geometry_SSE.h | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h index cb8d3458a..d03d85beb 100644 --- a/Eigen/src/Geometry/OrthoMethods.h +++ b/Eigen/src/Geometry/OrthoMethods.h @@ -54,7 +54,7 @@ MatrixBase::cross(const MatrixBase& other) const template< int Arch,typename VectorLhs,typename VectorRhs, typename Scalar = typename VectorLhs::Scalar, - int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit> + bool Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit> struct ei_cross3_impl { inline static typename ei_plain_matrix_type::type run(const VectorLhs& lhs, const VectorRhs& rhs) diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 583d6c6aa..5e52d5b5a 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -399,7 +399,8 @@ QuaternionBase::_transformVector(Vector3 v) const // It appears to be much faster than the common algorithm found // in the litterature (30 versus 39 flops). It also requires two // Vector3 as temporaries. - Vector3 uv = Scalar(2) * this->vec().cross(v); + Vector3 uv = this->vec().cross(v); + uv += uv; return v + this->w() * uv + this->vec().cross(uv); } diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index 798d81c91..7d82be694 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -54,8 +54,8 @@ struct ei_cross3_impl inline static typename ei_plain_matrix_type::type run(const VectorLhs& lhs, const VectorRhs& rhs) { - __m128 a = lhs.coeffs().packet(0); - __m128 b = rhs.coeffs().packet(0); + __m128 a = lhs.template packet(0); + __m128 b = rhs.template packet(0); __m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3)); __m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3)); typename ei_plain_matrix_type::type res;