mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 17:49:36 +08:00
Enable vectorization of transposeInPlace for PacketSize x PacketSize matrices
This commit is contained in:
parent
e1f1091fde
commit
c6eb84aabc
@ -217,18 +217,39 @@ MatrixBase<Derived>::adjoint() const
|
||||
namespace internal {
|
||||
|
||||
template<typename MatrixType,
|
||||
bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic>
|
||||
bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
|
||||
bool MatchPacketSize =
|
||||
(int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
|
||||
&& (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
|
||||
struct inplace_transpose_selector;
|
||||
|
||||
template<typename MatrixType>
|
||||
struct inplace_transpose_selector<MatrixType,true> { // square matrix
|
||||
struct inplace_transpose_selector<MatrixType,true,false> { // square matrix
|
||||
static void run(MatrixType& m) {
|
||||
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
|
||||
template<typename MatrixType>
|
||||
struct inplace_transpose_selector<MatrixType,false> { // non square matrix
|
||||
struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
|
||||
static void run(MatrixType& m) {
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
|
||||
typedef typename MatrixType::Index Index;
|
||||
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||
const Index Alignment = internal::evaluator<MatrixType>::Flags&AlignedBit ? Aligned : Unaligned;
|
||||
PacketBlock<Packet> A;
|
||||
for (Index i=0; i<PacketSize; ++i)
|
||||
A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
|
||||
internal::ptranspose(A);
|
||||
for (Index i=0; i<PacketSize; ++i)
|
||||
m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType,bool MatchPacketSize>
|
||||
struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix
|
||||
static void run(MatrixType& m) {
|
||||
if (m.rows()==m.cols())
|
||||
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
|
||||
|
@ -64,6 +64,7 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
|
||||
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||
|
||||
Index rows = m.rows();
|
||||
Index cols = m.cols();
|
||||
@ -109,6 +110,17 @@ template<typename MatrixType> void adjoint(const MatrixType& m)
|
||||
m3.transposeInPlace();
|
||||
VERIFY_IS_APPROX(m3,m1);
|
||||
|
||||
if(PacketSize<m3.rows() && PacketSize<m3.cols())
|
||||
{
|
||||
m3 = m1;
|
||||
Index i = internal::random<Index>(0,m3.rows()-PacketSize);
|
||||
Index j = internal::random<Index>(0,m3.cols()-PacketSize);
|
||||
m3.template block<PacketSize,PacketSize>(i,j).transposeInPlace();
|
||||
VERIFY_IS_APPROX( (m3.template block<PacketSize,PacketSize>(i,j)), (m1.template block<PacketSize,PacketSize>(i,j).transpose()) );
|
||||
m3.template block<PacketSize,PacketSize>(i,j).transposeInPlace();
|
||||
VERIFY_IS_APPROX(m3,m1);
|
||||
}
|
||||
|
||||
// check inplace adjoint
|
||||
m3 = m1;
|
||||
m3.adjointInPlace();
|
||||
@ -129,9 +141,19 @@ void test_adjoint()
|
||||
CALL_SUBTEST_1( adjoint(Matrix<float, 1, 1>()) );
|
||||
CALL_SUBTEST_2( adjoint(Matrix3d()) );
|
||||
CALL_SUBTEST_3( adjoint(Matrix4f()) );
|
||||
|
||||
CALL_SUBTEST_4( adjoint(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2), internal::random<int>(1,EIGEN_TEST_MAX_SIZE/2))) );
|
||||
CALL_SUBTEST_5( adjoint(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
CALL_SUBTEST_6( adjoint(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||
|
||||
// Complement for 128 bits vectorization:
|
||||
CALL_SUBTEST_8( adjoint(Matrix2d()) );
|
||||
CALL_SUBTEST_9( adjoint(Matrix<int,4,4>()) );
|
||||
|
||||
// 256 bits vectorization:
|
||||
CALL_SUBTEST_10( adjoint(Matrix<float,8,8>()) );
|
||||
CALL_SUBTEST_11( adjoint(Matrix<double,4,4>()) );
|
||||
CALL_SUBTEST_12( adjoint(Matrix<int,8,8>()) );
|
||||
}
|
||||
// test a large static matrix only once
|
||||
CALL_SUBTEST_7( adjoint(Matrix<float, 100, 100>()) );
|
||||
|
Loading…
x
Reference in New Issue
Block a user