From ba79e39c5c641888f58bff01febe9b81c4a22cbc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 18 Jun 2013 17:44:25 +0200 Subject: [PATCH] bug #71: enable vectorization of diagonal products in more cases. --- Eigen/src/Core/DiagonalProduct.h | 37 +++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h index d55b2c250..c03a0c2e1 100644 --- a/Eigen/src/Core/DiagonalProduct.h +++ b/Eigen/src/Core/DiagonalProduct.h @@ -26,14 +26,15 @@ struct traits > MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, - _PacketOnDiag = !((int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) - ||(int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), _SameTypes = is_same::value, // FIXME currently we need same types, but in the future the next rule should be the one - //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::Flags)&PacketAccessBit))), - _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && ((!_PacketOnDiag) || (bool(int(DiagonalType::Flags)&PacketAccessBit))), + //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), + _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - Flags = (HereditaryBits & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0), + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), CoeffReadCost = NumTraits::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost }; }; @@ -54,13 +55,21 @@ class DiagonalProduct : internal::no_assignment_operator, eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); } - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } + EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } - const Scalar coeff(Index row, Index col) const + EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col); } + + EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + enum { + StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } template EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const @@ -69,11 +78,19 @@ class DiagonalProduct : internal::no_assignment_operator, StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor }; const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; - return packet_impl(row,col,indexInDiagonalVector,typename internal::conditional< ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type()); } + + template + EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const + { + enum { + StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } protected: template @@ -88,7 +105,7 @@ class DiagonalProduct : internal::no_assignment_operator, { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalVectorPacketLoadMode = (LoadMode == Aligned && ((InnerSize%16) == 0)) ? Aligned : Unaligned + DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) }; return internal::pmul(m_matrix.template packet(row, col), m_diagonal.diagonal().template packet(id));