mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 02:33:59 +08:00
generalize our internal rank K update routine to support more general A*B product while evaluating only one triangular part and make it available via, e.g.:
R.triangularView<Lower>() += s * A * B;
This commit is contained in:
parent
c810d14d4d
commit
2577ef90c0
@ -308,6 +308,7 @@ using std::size_t;
|
||||
#include "src/Core/products/GeneralBlockPanelKernel.h"
|
||||
#include "src/Core/products/GeneralMatrixVector.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrix.h"
|
||||
#include "src/Core/products/GeneralMatrixMatrixTriangular.h"
|
||||
#include "src/Core/products/SelfadjointMatrixVector.h"
|
||||
#include "src/Core/products/SelfadjointMatrixMatrix.h"
|
||||
#include "src/Core/products/SelfadjointProduct.h"
|
||||
|
@ -189,9 +189,9 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::operator+=() */
|
||||
template<typename Other> TriangularView& operator+=(const Other& other) { return *this = m_matrix + other; }
|
||||
template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
|
||||
/** \sa MatrixBase::operator-=() */
|
||||
template<typename Other> TriangularView& operator-=(const Other& other) { return *this = m_matrix - other; }
|
||||
template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
|
||||
/** \sa MatrixBase::operator*=() */
|
||||
TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
|
||||
/** \sa MatrixBase::operator/=() */
|
||||
@ -292,7 +292,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
(lhs.derived(),rhs.m_matrix);
|
||||
}
|
||||
|
||||
|
||||
template<int Side, typename OtherDerived>
|
||||
typename internal::plain_matrix_type_column_major<OtherDerived>::type
|
||||
solve(const MatrixBase<OtherDerived>& other) const;
|
||||
@ -341,8 +340,51 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
else
|
||||
return m_matrix.diagonal().prod();
|
||||
}
|
||||
|
||||
|
||||
// TODO simplify the following:
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
setZero();
|
||||
return assignProduct(other,1);
|
||||
}
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
return assignProduct(other,1);
|
||||
}
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
|
||||
{
|
||||
return assignProduct(other,-1);
|
||||
}
|
||||
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
setZero();
|
||||
return assignProduct(other,other.alpha());
|
||||
}
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
return assignProduct(other,other.alpha());
|
||||
}
|
||||
|
||||
template<typename ProductDerived>
|
||||
EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
|
||||
{
|
||||
return assignProduct(other,-other.alpha());
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
|
||||
|
||||
const MatrixTypeNested m_matrix;
|
||||
};
|
||||
|
225
Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
Normal file
225
Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
Normal file
@ -0,0 +1,225 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
|
||||
#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/**********************************************************************
|
||||
* This file implements a general A * B product while
|
||||
* evaluating only one triangular part of the product.
|
||||
* This is more general version of self adjoint product (C += A A^T)
|
||||
* as the level 3 SYRK Blas routine.
|
||||
**********************************************************************/
|
||||
|
||||
// forward declarations (defined at the end of this file)
|
||||
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
||||
struct tribb_kernel;
|
||||
|
||||
/* Optimized matrix-matrix product evaluating only one triangular half */
|
||||
template <typename Index,
|
||||
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
|
||||
int ResStorageOrder, int UpLo>
|
||||
struct general_matrix_matrix_triangular_product;
|
||||
|
||||
// as usual if the result is row major => we transpose the product
|
||||
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
|
||||
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
|
||||
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
|
||||
{
|
||||
general_matrix_matrix_triangular_product<Index,
|
||||
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
|
||||
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
||||
ColMajor, UpLo==Lower?Upper:Lower>
|
||||
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
|
||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo>
|
||||
struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
|
||||
{
|
||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
|
||||
const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
|
||||
{
|
||||
const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
|
||||
const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
|
||||
|
||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||
|
||||
Index kc = depth; // cache block size along the K direction
|
||||
Index mc = size; // cache block size along the M direction
|
||||
Index nc = size; // cache block size along the N direction
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc);
|
||||
// !!! mc must be a multiple of nr:
|
||||
if(mc > Traits::nr)
|
||||
mc = (mc/Traits::nr)*Traits::nr;
|
||||
|
||||
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*size;
|
||||
RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB);
|
||||
RhsScalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||
gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||
gebp_kernel <LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
|
||||
tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
|
||||
|
||||
for(Index k2=0; k2<depth; k2+=kc)
|
||||
{
|
||||
const Index actual_kc = std::min(k2+kc,depth)-k2;
|
||||
|
||||
// note that the actual rhs is the transpose/adjoint of mat
|
||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);
|
||||
|
||||
for(Index i2=0; i2<size; i2+=mc)
|
||||
{
|
||||
const Index actual_mc = std::min(i2+mc,size)-i2;
|
||||
|
||||
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
|
||||
|
||||
// the selected actual_mc * size panel of res is split into three different part:
|
||||
// 1 - before the diagonal => processed with gebp or skipped
|
||||
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
|
||||
// 3 - after the diagonal => processed with gebp or skipped
|
||||
if (UpLo==Lower)
|
||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
|
||||
|
||||
if (UpLo==Upper)
|
||||
{
|
||||
Index j2 = i2+actual_mc;
|
||||
gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
}
|
||||
}
|
||||
}
|
||||
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
|
||||
ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB);
|
||||
}
|
||||
};
|
||||
|
||||
// Optimized packed Block * packed Block product kernel evaluating only one given triangular part
|
||||
// This kernel is built on top of the gebp kernel:
|
||||
// - the current destination block is processed per panel of actual_mc x BlockSize
|
||||
// where BlockSize is set to the minimal value allowing gebp to be as fast as possible
|
||||
// - then, as usual, each panel is split into three parts along the diagonal,
|
||||
// the sub blocks above and below the diagonal are processed as usual,
|
||||
// while the triangular block overlapping the diagonal is evaluated into a
|
||||
// small temporary buffer which is then accumulated into the result using a
|
||||
// triangular traversal.
|
||||
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
||||
struct tribb_kernel
|
||||
{
|
||||
typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
|
||||
typedef typename Traits::ResScalar ResScalar;
|
||||
|
||||
enum {
|
||||
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
|
||||
};
|
||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
|
||||
{
|
||||
gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
|
||||
|
||||
// let's process the block per panel of actual_mc x BlockSize,
|
||||
// again, each is split into three parts, etc.
|
||||
for (Index j=0; j<size; j+=BlockSize)
|
||||
{
|
||||
Index actualBlockSize = std::min<Index>(BlockSize,size - j);
|
||||
const RhsScalar* actual_b = blockB+j*depth;
|
||||
|
||||
if(UpLo==Upper)
|
||||
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
|
||||
// selfadjoint micro block
|
||||
{
|
||||
Index i = j;
|
||||
buffer.setZero();
|
||||
// 1 - apply the kernel on the temporary buffer
|
||||
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
// 2 - triangular accumulation
|
||||
for(Index j1=0; j1<actualBlockSize; ++j1)
|
||||
{
|
||||
ResScalar* r = res + (j+j1)*resStride + i;
|
||||
for(Index i1=UpLo==Lower ? j1 : 0;
|
||||
UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
|
||||
r[i1] += buffer(i1,j1);
|
||||
}
|
||||
}
|
||||
|
||||
if(UpLo==Lower)
|
||||
{
|
||||
Index i = j+actualBlockSize;
|
||||
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
// high level API
|
||||
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
template<typename ProductDerived, typename Lhs, typename Rhs>
|
||||
TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha)
|
||||
{
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
|
||||
typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
|
||||
const ActualLhs actualLhs = LhsBlasTraits::extract(prod.lhs());
|
||||
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
|
||||
typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
|
||||
const ActualRhs actualRhs = RhsBlasTraits::extract(prod.rhs());
|
||||
|
||||
typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||
|
||||
internal::general_matrix_matrix_triangular_product<Index,
|
||||
typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||
typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
|
||||
::run(m_matrix.cols(), actualLhs.cols(),
|
||||
&actualLhs.coeff(0,0), actualLhs.outerStride(), &actualRhs.coeff(0,0), actualRhs.outerStride(),
|
||||
const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
|
@ -25,175 +25,12 @@
|
||||
#ifndef EIGEN_SELFADJOINT_PRODUCT_H
|
||||
#define EIGEN_SELFADJOINT_PRODUCT_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/**********************************************************************
|
||||
* This file implements a self adjoint product: C += A A^T updating only
|
||||
* half of the selfadjoint matrix C.
|
||||
* It corresponds to the level 3 SYRK Blas routine.
|
||||
**********************************************************************/
|
||||
|
||||
// forward declarations (defined at the end of this file)
|
||||
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
||||
struct sybb_kernel;
|
||||
|
||||
/* Optimized selfadjoint product (_SYRK) */
|
||||
template <typename Scalar, typename Index,
|
||||
int RhsStorageOrder,
|
||||
int ResStorageOrder, bool AAT, int UpLo>
|
||||
struct selfadjoint_product;
|
||||
|
||||
// as usual if the result is row major => we transpose the product
|
||||
template <typename Scalar, typename Index, int MatStorageOrder, bool AAT, int UpLo>
|
||||
struct selfadjoint_product<Scalar, Index, MatStorageOrder, RowMajor, AAT, UpLo>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth, const Scalar* mat, Index matStride, Scalar* res, Index resStride, Scalar alpha)
|
||||
{
|
||||
selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, !AAT, UpLo==Lower?Upper:Lower>
|
||||
::run(size, depth, mat, matStride, res, resStride, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar, typename Index,
|
||||
int MatStorageOrder, bool AAT, int UpLo>
|
||||
struct selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpLo>
|
||||
{
|
||||
|
||||
static EIGEN_DONT_INLINE void run(
|
||||
Index size, Index depth,
|
||||
const Scalar* _mat, Index matStride,
|
||||
Scalar* res, Index resStride,
|
||||
Scalar alpha)
|
||||
{
|
||||
const_blas_data_mapper<Scalar, Index, MatStorageOrder> mat(_mat,matStride);
|
||||
|
||||
// if(AAT)
|
||||
// alpha = conj(alpha);
|
||||
|
||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||
|
||||
Index kc = depth; // cache block size along the K direction
|
||||
Index mc = size; // cache block size along the M direction
|
||||
Index nc = size; // cache block size along the N direction
|
||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
||||
// !!! mc must be a multiple of nr:
|
||||
if(mc>Traits::nr)
|
||||
mc = (mc/Traits::nr)*Traits::nr;
|
||||
|
||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
|
||||
std::size_t sizeB = sizeW + kc*size;
|
||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||
Scalar* blockB = allocatedBlockB + sizeW;
|
||||
|
||||
// note that the actual rhs is the transpose/adjoint of mat
|
||||
enum {
|
||||
ConjLhs = NumTraits<Scalar>::IsComplex && !AAT,
|
||||
ConjRhs = NumTraits<Scalar>::IsComplex && AAT
|
||||
};
|
||||
|
||||
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
gemm_pack_rhs<Scalar, Index, Traits::nr,MatStorageOrder==RowMajor ? ColMajor : RowMajor> pack_rhs;
|
||||
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, MatStorageOrder, false> pack_lhs;
|
||||
sybb_kernel<Scalar, Index, Traits::mr, Traits::nr, ConjLhs, ConjRhs, UpLo> sybb;
|
||||
|
||||
for(Index k2=0; k2<depth; k2+=kc)
|
||||
{
|
||||
const Index actual_kc = std::min(k2+kc,depth)-k2;
|
||||
|
||||
// note that the actual rhs is the transpose/adjoint of mat
|
||||
pack_rhs(blockB, &mat(0,k2), matStride, actual_kc, size);
|
||||
|
||||
for(Index i2=0; i2<size; i2+=mc)
|
||||
{
|
||||
const Index actual_mc = std::min(i2+mc,size)-i2;
|
||||
|
||||
pack_lhs(blockA, &mat(i2, k2), matStride, actual_kc, actual_mc);
|
||||
|
||||
// the selected actual_mc * size panel of res is split into three different part:
|
||||
// 1 - before the diagonal => processed with gebp or skipped
|
||||
// 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
|
||||
// 3 - after the diagonal => processed with gebp or skipped
|
||||
if (UpLo==Lower)
|
||||
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, std::min(size,i2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
|
||||
sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
|
||||
|
||||
if (UpLo==Upper)
|
||||
{
|
||||
Index j2 = i2+actual_mc;
|
||||
gebp_kernel(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, std::max(Index(0), size-j2), alpha,
|
||||
-1, -1, 0, 0, allocatedBlockB);
|
||||
}
|
||||
}
|
||||
}
|
||||
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
|
||||
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
|
||||
}
|
||||
};
|
||||
|
||||
// Optimized SYmmetric packed Block * packed Block product kernel.
|
||||
// This kernel is built on top of the gebp kernel:
|
||||
// - the current selfadjoint block (res) is processed per panel of actual_mc x BlockSize
|
||||
// where BlockSize is set to the minimal value allowing gebp to be as fast as possible
|
||||
// - then, as usual, each panel is split into three parts along the diagonal,
|
||||
// the sub blocks above and below the diagonal are processed as usual,
|
||||
// while the selfadjoint block overlapping the diagonal is evaluated into a
|
||||
// small temporary buffer which is then accumulated into the result using a
|
||||
// triangular traversal.
|
||||
template<typename Scalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
|
||||
struct sybb_kernel
|
||||
{
|
||||
enum {
|
||||
PacketSize = packet_traits<Scalar>::size,
|
||||
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
|
||||
};
|
||||
void operator()(Scalar* res, Index resStride, const Scalar* blockA, const Scalar* blockB, Index size, Index depth, Scalar alpha, Scalar* workspace)
|
||||
{
|
||||
gebp_kernel<Scalar, Scalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
|
||||
Matrix<Scalar,BlockSize,BlockSize,ColMajor> buffer;
|
||||
|
||||
// let's process the block per panel of actual_mc x BlockSize,
|
||||
// again, each is split into three parts, etc.
|
||||
for (Index j=0; j<size; j+=BlockSize)
|
||||
{
|
||||
Index actualBlockSize = std::min<Index>(BlockSize,size - j);
|
||||
const Scalar* actual_b = blockB+j*depth;
|
||||
|
||||
if(UpLo==Upper)
|
||||
gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
|
||||
// selfadjoint micro block
|
||||
{
|
||||
Index i = j;
|
||||
buffer.setZero();
|
||||
// 1 - apply the kernel on the temporary buffer
|
||||
gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
// 2 - triangular accumulation
|
||||
for(Index j1=0; j1<actualBlockSize; ++j1)
|
||||
{
|
||||
Scalar* r = res + (j+j1)*resStride + i;
|
||||
for(Index i1=UpLo==Lower ? j1 : 0;
|
||||
UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
|
||||
r[i1] += buffer(i1,j1);
|
||||
}
|
||||
}
|
||||
|
||||
if(UpLo==Lower)
|
||||
{
|
||||
Index i = j+actualBlockSize;
|
||||
gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
|
||||
-1, -1, 0, 0, workspace);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
// high level API
|
||||
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
@ -209,12 +46,13 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
|
||||
Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived());
|
||||
|
||||
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
|
||||
|
||||
internal::selfadjoint_product<Scalar, Index,
|
||||
_ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||
!UBlasTraits::NeedToConjugate, UpLo>
|
||||
::run(_expression().cols(), actualU.cols(), &actualU.coeff(0,0), actualU.outerStride(),
|
||||
|
||||
internal::general_matrix_matrix_triangular_product<Index,
|
||||
Scalar, _ActualUType::Flags&RowMajorBit ? RowMajor : ColMajor, UBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
|
||||
Scalar, _ActualUType::Flags&RowMajorBit ? ColMajor : RowMajor, (!UBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
|
||||
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
|
||||
::run(_expression().cols(), actualU.cols(),
|
||||
&actualU.coeff(0,0), actualU.outerStride(), &actualU.coeff(0,0), actualU.outerStride(),
|
||||
const_cast<Scalar*>(_expression().data()), _expression().outerStride(), actualAlpha);
|
||||
|
||||
return *this;
|
||||
|
@ -73,6 +73,7 @@ ei_add_test(product_syrk)
|
||||
ei_add_test(product_trmv)
|
||||
ei_add_test(product_trmm)
|
||||
ei_add_test(product_trsolve)
|
||||
ei_add_test(product_mmtr)
|
||||
ei_add_test(product_notemporary)
|
||||
ei_add_test(stable_norm)
|
||||
ei_add_test(bandmatrix)
|
||||
|
80
test/product_mmtr.cpp
Normal file
80
test/product_mmtr.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#define CHECK_MMTR(DEST, TRI, OP) { \
|
||||
ref2 = ref1 = DEST; \
|
||||
DEST.template triangularView<TRI>() OP; \
|
||||
ref1 OP; \
|
||||
ref2.template triangularView<TRI>() = ref1; \
|
||||
VERIFY_IS_APPROX(DEST,ref2); \
|
||||
}
|
||||
|
||||
template<typename Scalar> void mmtr(int size)
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> MatrixColMaj;
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic,RowMajor> MatrixRowMaj;
|
||||
|
||||
DenseIndex othersize = internal::random<DenseIndex>(1,200);
|
||||
|
||||
MatrixColMaj matc(size, size);
|
||||
MatrixRowMaj matr(size, size);
|
||||
MatrixColMaj ref1(size, size), ref2(size, size);
|
||||
|
||||
MatrixColMaj soc(size,othersize); soc.setRandom();
|
||||
MatrixColMaj osc(othersize,size); osc.setRandom();
|
||||
MatrixRowMaj sor(size,othersize); sor.setRandom();
|
||||
MatrixRowMaj osr(othersize,size); osr.setRandom();
|
||||
|
||||
Scalar s = internal::random<Scalar>();
|
||||
|
||||
CHECK_MMTR(matc, Lower, = s*soc*sor.adjoint());
|
||||
CHECK_MMTR(matc, Upper, = s*(soc*soc.adjoint()));
|
||||
CHECK_MMTR(matr, Lower, = s*soc*soc.adjoint());
|
||||
CHECK_MMTR(matr, Upper, = soc*(s*sor.adjoint()));
|
||||
|
||||
CHECK_MMTR(matc, Lower, += s*soc*soc.adjoint());
|
||||
CHECK_MMTR(matc, Upper, += s*(soc*sor.transpose()));
|
||||
CHECK_MMTR(matr, Lower, += s*sor*soc.adjoint());
|
||||
CHECK_MMTR(matr, Upper, += soc*(s*soc.adjoint()));
|
||||
|
||||
CHECK_MMTR(matc, Lower, -= s*soc*soc.adjoint());
|
||||
CHECK_MMTR(matc, Upper, -= s*(osc.transpose()*osc.conjugate()));
|
||||
CHECK_MMTR(matr, Lower, -= s*soc*soc.adjoint());
|
||||
CHECK_MMTR(matr, Upper, -= soc*(s*soc.adjoint()));
|
||||
}
|
||||
|
||||
void test_product_mmtr()
|
||||
{
|
||||
for(int i = 0; i < g_repeat ; i++)
|
||||
{
|
||||
CALL_SUBTEST_1((mmtr<float>(internal::random<int>(1,320))));
|
||||
CALL_SUBTEST_2((mmtr<double>(internal::random<int>(1,320))));
|
||||
CALL_SUBTEST_3((mmtr<std::complex<float> >(internal::random<int>(1,200))));
|
||||
CALL_SUBTEST_4((mmtr<std::complex<double> >(internal::random<int>(1,200))));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user