mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
fix vectorization logic and code of cross3 which was never enabled..
This commit is contained in:
parent
f9123df772
commit
91e9344be9
@ -54,7 +54,7 @@ MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
|
|||||||
|
|
||||||
template< int Arch,typename VectorLhs,typename VectorRhs,
|
template< int Arch,typename VectorLhs,typename VectorRhs,
|
||||||
typename Scalar = typename VectorLhs::Scalar,
|
typename Scalar = typename VectorLhs::Scalar,
|
||||||
int Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
|
bool Vectorizable = (VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit>
|
||||||
struct ei_cross3_impl {
|
struct ei_cross3_impl {
|
||||||
inline static typename ei_plain_matrix_type<VectorLhs>::type
|
inline static typename ei_plain_matrix_type<VectorLhs>::type
|
||||||
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
||||||
|
@ -399,7 +399,8 @@ QuaternionBase<Derived>::_transformVector(Vector3 v) const
|
|||||||
// It appears to be much faster than the common algorithm found
|
// It appears to be much faster than the common algorithm found
|
||||||
// in the litterature (30 versus 39 flops). It also requires two
|
// in the litterature (30 versus 39 flops). It also requires two
|
||||||
// Vector3 as temporaries.
|
// Vector3 as temporaries.
|
||||||
Vector3 uv = Scalar(2) * this->vec().cross(v);
|
Vector3 uv = this->vec().cross(v);
|
||||||
|
uv += uv;
|
||||||
return v + this->w() * uv + this->vec().cross(uv);
|
return v + this->w() * uv + this->vec().cross(uv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,8 +54,8 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
|
|||||||
inline static typename ei_plain_matrix_type<VectorLhs>::type
|
inline static typename ei_plain_matrix_type<VectorLhs>::type
|
||||||
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
run(const VectorLhs& lhs, const VectorRhs& rhs)
|
||||||
{
|
{
|
||||||
__m128 a = lhs.coeffs().packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
__m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
||||||
__m128 b = rhs.coeffs().packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
__m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0);
|
||||||
__m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
|
__m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3));
|
||||||
__m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
|
__m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3));
|
||||||
typename ei_plain_matrix_type<VectorLhs>::type res;
|
typename ei_plain_matrix_type<VectorLhs>::type res;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user