mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fix 4x4 inverse via SSE for submatrices
This commit is contained in:
parent
db183ca7b3
commit
26d2cdefd4
@ -39,9 +39,11 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
|
|||||||
ResultAlignment = bool(ResultType::Flags&AlignedBit),
|
ResultAlignment = bool(ResultType::Flags&AlignedBit),
|
||||||
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
|
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
|
||||||
};
|
};
|
||||||
|
typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
|
||||||
|
|
||||||
static void run(const MatrixType& matrix, ResultType& result)
|
static void run(const MatrixType& mat, ResultType& result)
|
||||||
{
|
{
|
||||||
|
ActualMatrixType matrix(mat);
|
||||||
EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
|
EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
|
||||||
|
|
||||||
// Load the full matrix into registers
|
// Load the full matrix into registers
|
||||||
@ -167,14 +169,17 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
|
|||||||
ResultAlignment = bool(ResultType::Flags&AlignedBit),
|
ResultAlignment = bool(ResultType::Flags&AlignedBit),
|
||||||
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
|
StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
|
||||||
};
|
};
|
||||||
static void run(const MatrixType& matrix, ResultType& result)
|
typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
|
||||||
|
|
||||||
|
static void run(const MatrixType& mat, ResultType& result)
|
||||||
{
|
{
|
||||||
|
ActualMatrixType matrix(mat);
|
||||||
const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
|
const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
|
||||||
const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||||
|
|
||||||
// The inverse is calculated using "Divide and Conquer" technique. The
|
// The inverse is calculated using "Divide and Conquer" technique. The
|
||||||
// original matrix is divide into four 2x2 sub-matrices. Since each
|
// original matrix is divide into four 2x2 sub-matrices. Since each
|
||||||
// register of the matrix holds two element, the smaller matrices are
|
// register of the matrix holds two elements, the smaller matrices are
|
||||||
// consisted of two registers. Hence we get a better locality of the
|
// consisted of two registers. Hence we get a better locality of the
|
||||||
// calculations.
|
// calculations.
|
||||||
|
|
||||||
|
@ -220,11 +220,9 @@ ei_add_test(geo_quaternion)
|
|||||||
ei_add_test(geo_eulerangles)
|
ei_add_test(geo_eulerangles)
|
||||||
ei_add_test(geo_parametrizedline)
|
ei_add_test(geo_parametrizedline)
|
||||||
ei_add_test(geo_alignedbox)
|
ei_add_test(geo_alignedbox)
|
||||||
if(NOT EIGEN_TEST_EVALUATORS)
|
ei_add_test(geo_hyperplane)
|
||||||
ei_add_test(geo_hyperplane)
|
ei_add_test(geo_transformations)
|
||||||
ei_add_test(geo_transformations)
|
ei_add_test(geo_homogeneous)
|
||||||
ei_add_test(geo_homogeneous)
|
|
||||||
endif(NOT EIGEN_TEST_EVALUATORS)
|
|
||||||
ei_add_test(stdvector)
|
ei_add_test(stdvector)
|
||||||
ei_add_test(stdvector_overload)
|
ei_add_test(stdvector_overload)
|
||||||
ei_add_test(stdlist)
|
ei_add_test(stdlist)
|
||||||
|
@ -68,6 +68,14 @@ template<typename MatrixType> void inverse(const MatrixType& m)
|
|||||||
VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1));
|
VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1));
|
||||||
m3.computeInverseWithCheck(m4, invertible);
|
m3.computeInverseWithCheck(m4, invertible);
|
||||||
VERIFY( rows==1 ? invertible : !invertible );
|
VERIFY( rows==1 ? invertible : !invertible );
|
||||||
|
|
||||||
|
// check with submatrices
|
||||||
|
{
|
||||||
|
Matrix<Scalar, MatrixType::RowsAtCompileTime+1, MatrixType::RowsAtCompileTime+1, MatrixType::Options> m3;
|
||||||
|
m3.setRandom();
|
||||||
|
m2 = m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>().inverse();
|
||||||
|
VERIFY_IS_APPROX( (m3.template topLeftCorner<MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime>()), m2.inverse() );
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// check in-place inversion
|
// check in-place inversion
|
||||||
|
Loading…
x
Reference in New Issue
Block a user