Optimization: enable cache-efficient application of HouseholderSequence.

This commit is contained in:
Gael Guennebaud 2014-09-04 09:15:59 +02:00
parent 80993b95d3
commit 8846aa6d1b
4 changed files with 89 additions and 28 deletions

View File

@ -16,48 +16,85 @@
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
/** \internal */
// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
// {
// typedef typename TriangularFactorType::Index Index;
// typedef typename VectorsType::Scalar Scalar;
// const Index nbVecs = vectors.cols();
// eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
//
// for(Index i = 0; i < nbVecs; i++)
// {
// Index rs = vectors.rows() - i;
// // Warning, note that hCoeffs may alias with vectors.
// // It is then necessary to copy it before modifying vectors(i,i).
// typename CoeffsType::Scalar h = hCoeffs(i);
// // This hack permits to pass trough nested Block<> and Transpose<> expressions.
// Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i);
// Scalar Vii = *Vii_ptr;
// *Vii_ptr = Scalar(1);
// triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint()
// * vectors.col(i).tail(rs);
// *Vii_ptr = Vii;
// // FIXME add .noalias() once the triangular product can work inplace
// triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
// * triFactor.col(i).head(i);
// triFactor(i,i) = hCoeffs(i);
// }
// }
/** \internal */ /** \internal */
// This variant avoid modifications in vectors
template<typename TriangularFactorType,typename VectorsType,typename CoeffsType> template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs) void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
{ {
typedef typename TriangularFactorType::Index Index; typedef typename TriangularFactorType::Index Index;
typedef typename VectorsType::Scalar Scalar;
const Index nbVecs = vectors.cols(); const Index nbVecs = vectors.cols();
eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs); eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
for(Index i = 0; i < nbVecs; i++) for(Index i = nbVecs-1; i >=0 ; --i)
{ {
Index rs = vectors.rows() - i; Index rs = vectors.rows() - i - 1;
Scalar Vii = vectors(i,i); Index rt = nbVecs-i-1;
vectors.const_cast_derived().coeffRef(i,i) = Scalar(1);
triFactor.col(i).head(i).noalias() = -hCoeffs(i) * vectors.block(i, 0, rs, i).adjoint() if(rt>0)
* vectors.col(i).tail(rs); {
vectors.const_cast_derived().coeffRef(i, i) = Vii; triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
// FIXME add .noalias() once the triangular product can work inplace * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();
triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
* triFactor.col(i).head(i); // FIXME add .noalias() once the triangular product can work inplace
triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();
}
triFactor(i,i) = hCoeffs(i); triFactor(i,i) = hCoeffs(i);
} }
} }
/** \internal */ /** \internal
* if forward then perform mat = H0 * H1 * H2 * mat
* otherwise perform mat = H2 * H1 * H0 * mat
*/
template<typename MatrixType,typename VectorsType,typename CoeffsType> template<typename MatrixType,typename VectorsType,typename CoeffsType>
void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs) void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)
{ {
typedef typename MatrixType::Index Index; typedef typename MatrixType::Index Index;
enum { TFactorSize = MatrixType::ColsAtCompileTime }; enum { TFactorSize = MatrixType::ColsAtCompileTime };
Index nbVecs = vectors.cols(); Index nbVecs = vectors.cols();
Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, ColMajor> T(nbVecs,nbVecs); Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);
make_block_householder_triangular_factor(T, vectors, hCoeffs);
if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs);
const TriangularView<const VectorsType, UnitLower>& V(vectors); else make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate());
const TriangularView<const VectorsType, UnitLower> V(vectors);
// A -= V T V^* A // A -= V T V^* A
Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0, Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,0,
VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat; VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
// FIXME add .noalias() once the triangular product can work inplace // FIXME add .noalias() once the triangular product can work inplace
tmp = T.template triangularView<Upper>().adjoint() * tmp; if(forward) tmp = T.template triangularView<Upper>() * tmp;
else tmp = T.template triangularView<Upper>().adjoint() * tmp;
mat.noalias() -= V * tmp; mat.noalias() -= V * tmp;
} }

View File

@ -319,12 +319,36 @@ template<typename VectorsType, typename CoeffsType, int Side> class HouseholderS
template<typename Dest, typename Workspace> template<typename Dest, typename Workspace>
inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const
{ {
workspace.resize(dst.cols()); const Index BlockSize = 1;
for(Index k = 0; k < m_length; ++k) // if the entries are large enough, then apply the reflectors by block
if(m_length>BlockSize && dst.cols()>1)
{ {
Index actual_k = m_trans ? k : m_length-k-1; for(Index i = 0; i < m_length; i+=BlockSize)
dst.bottomRows(rows()-m_shift-actual_k) {
.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data()); Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i;
Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize);
Index bs = end-k;
Index start = k + m_shift;
typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;
SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,
Side==OnTheRight ? start : k,
Side==OnTheRight ? bs : m_vectors.rows()-start,
Side==OnTheRight ? m_vectors.cols()-start : bs);
typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);
Block<Dest,Dynamic,Dynamic> sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols());
apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans);
}
}
else
{
workspace.resize(dst.cols());
for(Index k = 0; k < m_length; ++k)
{
Index actual_k = m_trans ? k : m_length-k-1;
dst.bottomRows(rows()-m_shift-actual_k)
.applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
}
} }
} }

View File

@ -299,8 +299,8 @@ struct householder_qr_inplace_blocked
for (k = 0; k < size; k += blockSize) for (k = 0; k < size; k += blockSize)
{ {
Index bs = (std::min)(size-k,blockSize); // actual size of the block Index bs = (std::min)(size-k,blockSize); // actual size of the block
Index tcols = cols - k - bs; // trailing columns Index tcols = cols - k - bs; // trailing columns
Index brows = rows-k; // rows of the block Index brows = rows-k; // rows of the block
// partition the matrix: // partition the matrix:
// A00 | A01 | A02 // A00 | A01 | A02
@ -318,7 +318,7 @@ struct householder_qr_inplace_blocked
if(tcols) if(tcols)
{ {
BlockType A21_22 = mat.block(k,k+bs,brows,tcols); BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint()); apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward
} }
} }
} }

View File

@ -215,10 +215,10 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint(); if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint();
} }
// 5 - construct right Householder transform in-placecols // 5 - construct right Householder transform in-place
u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]); u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]);
// this eases the application of Householder transforAions // this eases the application of Householder transformations
// A(k,k+1) will store tau_u later // A(k,k+1) will store tau_u later
A(k,k+1) = Scalar(1); A(k,k+1) = Scalar(1);