fix vectorization logic and code of cross3 which was never enabled..

This commit is contained in:
Gael Guennebaud 2010-09-08 14:10:01 +02:00
parent f9123df772
commit 91e9344be9
3 changed files with 5 additions and 4 deletions

View File

@ -54,7 +54,7 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
template< int Arch,typename VectorLhs,typename VectorRhs,
typename Scalar = typename VectorLhs::Scalar,
int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
bool Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
struct ei_cross3_impl {
inline static typename ei_plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)

View File

@ -399,7 +399,8 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
// It appears to be much faster than the common algorithm found
// in the litterature (30 versus 39 flops). It also requires two
// Vector3 as temporaries.
Vector3 uv = Scalar(2) * this->vec().cross(v);
Vector3 uv = this->vec().cross(v);
uv += uv;
return v + this->w() * uv + this->vec().cross(uv);
}

View File

@ -54,8 +54,8 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
inline static typename ei_plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs)
{
__m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
__m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
typename ei_plain_matrix_type<VectorLhs>::type res;