mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 11:19:02 +08:00
Implement functors for rank-1 and rank-2 update.
This commit is contained in:
parent
145f89cd5f
commit
b0b9b4d6b2
30
blas/GeneralRank1Update.h
Normal file
30
blas/GeneralRank1Update.h
Normal file
@ -0,0 +1,30 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Chen-Pang He <jdh8@ms63.hinet.net>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_GENERAL_RANK1UPDATE_H
|
||||
#define EIGEN_GENERAL_RANK1UPDATE_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/* Optimized matrix += alpha * uv' */
|
||||
template<typename Scalar, typename Index, bool ConjRhs>
|
||||
struct general_rank1_update
|
||||
{
|
||||
static void run(Index rows, Index cols, Scalar* mat, Index stride, const Scalar* u, const Scalar* v, Scalar alpha)
|
||||
{
|
||||
typedef Matrix<Scalar,Dynamic,1> PlainVector;
|
||||
internal::conj_if<ConjRhs> cj;
|
||||
for (Index i=0; i<cols; ++i)
|
||||
Map<PlainVector>(mat+stride*i,rows) += alpha * cj(v[i]) * Map<const PlainVector>(u,rows);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_GENERAL_RANK1UPDATE_H
|
57
blas/Rank2Update.h
Normal file
57
blas/Rank2Update.h
Normal file
@ -0,0 +1,57 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Chen-Pang He <jdh8@ms63.hinet.net>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_RANK2UPDATE_H
|
||||
#define EIGEN_RANK2UPDATE_H
|
||||
|
||||
namespace internal {
|
||||
|
||||
/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
|
||||
* This is the low-level version of SelfadjointRank2Update.h
|
||||
*/
|
||||
template<typename Scalar, typename Index, int UpLo>
|
||||
struct rank2_update_selector;
|
||||
|
||||
template<typename Scalar, typename Index>
|
||||
struct rank2_update_selector<Scalar,Index,Upper>
|
||||
{
|
||||
static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha)
|
||||
{
|
||||
typedef Matrix<Scalar,Dynamic,1> PlainVector;
|
||||
Map<const PlainVector> u(_u, size), v(_v, size);
|
||||
|
||||
for (Index i=0; i<size; ++i)
|
||||
{
|
||||
Map<PlainVector>(mat+stride*i, i+1) +=
|
||||
conj(alpha) * conj(_u[i]) * v.head(i+1)
|
||||
+ alpha * conj(_v[i]) * u.head(i+1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, typename Index>
|
||||
struct rank2_update_selector<Scalar,Index,Lower>
|
||||
{
|
||||
static void run(Index size, Scalar* mat, Index stride, const Scalar* _u, const Scalar* _v, Scalar alpha)
|
||||
{
|
||||
typedef Matrix<Scalar,Dynamic,1> PlainVector;
|
||||
Map<const PlainVector> u(_u, size), v(_v, size);
|
||||
|
||||
for (Index i=0; i<size; ++i)
|
||||
{
|
||||
Map<PlainVector>(mat+(stride+1)*i, size-i) +=
|
||||
conj(alpha) * conj(_u[i]) * v.tail(size-i)
|
||||
+ alpha * conj(_v[i]) * u.tail(size-i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_RANK2UPDATE_H
|
@ -74,6 +74,8 @@ inline bool check_uplo(const char* uplo)
|
||||
|
||||
namespace Eigen {
|
||||
#include "BandTriangularSolver.h"
|
||||
#include "GeneralRank1Update.h"
|
||||
#include "Rank2Update.h"
|
||||
}
|
||||
|
||||
using namespace Eigen;
|
||||
|
@ -117,6 +117,21 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa
|
||||
*/
|
||||
int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda)
|
||||
{
|
||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, Scalar);
|
||||
static functype func[2];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
{
|
||||
for(int k=0; k<2; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
|
||||
func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||
RealScalar alpha = *reinterpret_cast<RealScalar*>(palpha);
|
||||
@ -134,16 +149,11 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
|
||||
|
||||
Scalar* x_cpy = get_compact_vector(x, *n, *incx);
|
||||
|
||||
// TODO perform direct calls to underlying implementation
|
||||
// if(UPLO(*uplo)==LO) matrix(a,*n,*n,*lda).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), alpha);
|
||||
// else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), alpha);
|
||||
int code = UPLO(*uplo);
|
||||
if(code>=2 || func[code]==0)
|
||||
return 0;
|
||||
|
||||
if(UPLO(*uplo)==LO)
|
||||
for(int j=0;j<*n;++j)
|
||||
matrix(a,*n,*n,*lda).col(j).tail(*n-j) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy+j,*n-j);
|
||||
else
|
||||
for(int j=0;j<*n;++j)
|
||||
matrix(a,*n,*n,*lda).col(j).head(j+1) += alpha * internal::conj(x_cpy[j]) * vector(x_cpy,j+1);
|
||||
func[code](*n, a, *lda, x_cpy, alpha);
|
||||
|
||||
matrix(a,*n,*n,*lda).diagonal().imag().setZero();
|
||||
|
||||
@ -161,6 +171,21 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
|
||||
*/
|
||||
int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
|
||||
{
|
||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
|
||||
static functype func[2];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
{
|
||||
for(int k=0; k<2; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run);
|
||||
func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||
@ -181,9 +206,11 @@ int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
|
||||
Scalar* x_cpy = get_compact_vector(x, *n, *incx);
|
||||
Scalar* y_cpy = get_compact_vector(y, *n, *incy);
|
||||
|
||||
// TODO perform direct calls to underlying implementation
|
||||
if(UPLO(*uplo)==LO) matrix(a,*n,*n,*lda).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha);
|
||||
else if(UPLO(*uplo)==UP) matrix(a,*n,*n,*lda).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n),vector(y_cpy,*n),alpha);
|
||||
int code = UPLO(*uplo);
|
||||
if(code>=2 || func[code]==0)
|
||||
return 0;
|
||||
|
||||
func[code](*n, a, *lda, x_cpy, y_cpy, alpha);
|
||||
|
||||
matrix(a,*n,*n,*lda).diagonal().imag().setZero();
|
||||
|
||||
@ -222,8 +249,7 @@ int EIGEN_BLAS_FUNC(geru)(int *m, int *n, RealScalar *palpha, RealScalar *px, in
|
||||
Scalar* x_cpy = get_compact_vector(x,*m,*incx);
|
||||
Scalar* y_cpy = get_compact_vector(y,*n,*incy);
|
||||
|
||||
// TODO perform direct calls to underlying implementation
|
||||
matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).transpose();
|
||||
internal::general_rank1_update<Scalar,int,false>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
|
||||
|
||||
if(x_cpy!=x) delete[] x_cpy;
|
||||
if(y_cpy!=y) delete[] y_cpy;
|
||||
@ -260,8 +286,7 @@ int EIGEN_BLAS_FUNC(gerc)(int *m, int *n, RealScalar *palpha, RealScalar *px, in
|
||||
Scalar* x_cpy = get_compact_vector(x,*m,*incx);
|
||||
Scalar* y_cpy = get_compact_vector(y,*n,*incy);
|
||||
|
||||
// TODO perform direct calls to underlying implementation
|
||||
matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).adjoint();
|
||||
internal::general_rank1_update<Scalar,int,Conj>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
|
||||
|
||||
if(x_cpy!=x) delete[] x_cpy;
|
||||
if(y_cpy!=y) delete[] y_cpy;
|
||||
|
@ -49,7 +49,8 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
|
||||
|
||||
int actual_m = *m;
|
||||
int actual_n = *n;
|
||||
if(OP(*opa)!=NOTR)
|
||||
int code = OP(*opa);
|
||||
if(code!=NOTR)
|
||||
std::swap(actual_m,actual_n);
|
||||
|
||||
Scalar* actual_b = get_compact_vector(b,actual_n,*incb);
|
||||
@ -61,7 +62,9 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
|
||||
else vector(actual_c, actual_m) *= beta;
|
||||
}
|
||||
|
||||
int code = OP(*opa);
|
||||
if(code>=4 || func[code]==0)
|
||||
return 0;
|
||||
|
||||
func[code](actual_m, actual_n, a, *lda, actual_b, 1, actual_c, 1, alpha);
|
||||
|
||||
if(actual_b!=b) delete[] actual_b;
|
||||
@ -416,42 +419,3 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real
|
||||
// return 1;
|
||||
// }
|
||||
|
||||
/** DGER performs the rank 1 operation
|
||||
*
|
||||
* A := alpha*x*y' + A,
|
||||
*
|
||||
* where alpha is a scalar, x is an m element vector, y is an n element
|
||||
* vector and A is an m by n matrix.
|
||||
*/
|
||||
int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa, int *lda)
|
||||
{
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
|
||||
int info = 0;
|
||||
if(*m<0) info = 1;
|
||||
else if(*n<0) info = 2;
|
||||
else if(*incx==0) info = 5;
|
||||
else if(*incy==0) info = 7;
|
||||
else if(*lda<std::max(1,*m)) info = 9;
|
||||
if(info)
|
||||
return xerbla_(SCALAR_SUFFIX_UP"GER ",&info,6);
|
||||
|
||||
if(alpha==Scalar(0))
|
||||
return 1;
|
||||
|
||||
Scalar* x_cpy = get_compact_vector(x,*m,*incx);
|
||||
Scalar* y_cpy = get_compact_vector(y,*n,*incy);
|
||||
|
||||
// TODO perform direct calls to underlying implementation
|
||||
matrix(a,*m,*n,*lda) += alpha * vector(x_cpy,*m) * vector(y_cpy,*n).adjoint();
|
||||
|
||||
if(x_cpy!=x) delete[] x_cpy;
|
||||
if(y_cpy!=y) delete[] y_cpy;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -68,6 +68,20 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
|
||||
|
||||
// init = true;
|
||||
// }
|
||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, Scalar);
|
||||
static functype func[2];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
{
|
||||
for(int k=0; k<2; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
|
||||
func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
||||
@ -86,18 +100,11 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
|
||||
// if the increment is not 1, let's copy it to a temporary vector to enable vectorization
|
||||
Scalar* x_cpy = get_compact_vector(x,*n,*incx);
|
||||
|
||||
Matrix<Scalar,Dynamic,Dynamic> m2(matrix(c,*n,*n,*ldc));
|
||||
int code = UPLO(*uplo);
|
||||
if(code>=2 || func[code]==0)
|
||||
return 0;
|
||||
|
||||
// TODO check why this is not accurate enough for lapack tests
|
||||
// if(UPLO(*uplo)==LO) matrix(c,*n,*n,*ldc).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), alpha);
|
||||
// else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), alpha);
|
||||
|
||||
if(UPLO(*uplo)==LO)
|
||||
for(int j=0;j<*n;++j)
|
||||
matrix(c,*n,*n,*ldc).col(j).tail(*n-j) += alpha * x_cpy[j] * vector(x_cpy+j,*n-j);
|
||||
else
|
||||
for(int j=0;j<*n;++j)
|
||||
matrix(c,*n,*n,*ldc).col(j).head(j+1) += alpha * x_cpy[j] * vector(x_cpy,j+1);
|
||||
func[code](*n, c, *ldc, x_cpy, alpha);
|
||||
|
||||
if(x_cpy!=x) delete[] x_cpy;
|
||||
|
||||
@ -121,6 +128,20 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
|
||||
//
|
||||
// init = true;
|
||||
// }
|
||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
|
||||
static functype func[2];
|
||||
|
||||
static bool init = false;
|
||||
if(!init)
|
||||
{
|
||||
for(int k=0; k<2; ++k)
|
||||
func[k] = 0;
|
||||
|
||||
func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run);
|
||||
func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run);
|
||||
|
||||
init = true;
|
||||
}
|
||||
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
@ -142,9 +163,11 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
|
||||
Scalar* x_cpy = get_compact_vector(x,*n,*incx);
|
||||
Scalar* y_cpy = get_compact_vector(y,*n,*incy);
|
||||
|
||||
// TODO perform direct calls to underlying implementation
|
||||
if(UPLO(*uplo)==LO) matrix(c,*n,*n,*ldc).selfadjointView<Lower>().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha);
|
||||
else if(UPLO(*uplo)==UP) matrix(c,*n,*n,*ldc).selfadjointView<Upper>().rankUpdate(vector(x_cpy,*n), vector(y_cpy,*n), alpha);
|
||||
int code = UPLO(*uplo);
|
||||
if(code>=2 || func[code]==0)
|
||||
return 0;
|
||||
|
||||
func[code](*n, c, *ldc, x_cpy, y_cpy, alpha);
|
||||
|
||||
if(x_cpy!=x) delete[] x_cpy;
|
||||
if(y_cpy!=y) delete[] y_cpy;
|
||||
@ -208,3 +231,41 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
|
||||
// return 1;
|
||||
// }
|
||||
|
||||
/** DGER performs the rank 1 operation
|
||||
*
|
||||
* A := alpha*x*y' + A,
|
||||
*
|
||||
* where alpha is a scalar, x is an m element vector, y is an n element
|
||||
* vector and A is an m by n matrix.
|
||||
*/
|
||||
int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *py, int *incy, Scalar *pa, int *lda)
|
||||
{
|
||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||
Scalar alpha = *reinterpret_cast<Scalar*>(palpha);
|
||||
|
||||
int info = 0;
|
||||
if(*m<0) info = 1;
|
||||
else if(*n<0) info = 2;
|
||||
else if(*incx==0) info = 5;
|
||||
else if(*incy==0) info = 7;
|
||||
else if(*lda<std::max(1,*m)) info = 9;
|
||||
if(info)
|
||||
return xerbla_(SCALAR_SUFFIX_UP"GER ",&info,6);
|
||||
|
||||
if(alpha==Scalar(0))
|
||||
return 1;
|
||||
|
||||
Scalar* x_cpy = get_compact_vector(x,*m,*incx);
|
||||
Scalar* y_cpy = get_compact_vector(y,*n,*incy);
|
||||
|
||||
internal::general_rank1_update<Scalar,int,false>::run(*m, *n, a, *lda, x_cpy, y_cpy, alpha);
|
||||
|
||||
if(x_cpy!=x) delete[] x_cpy;
|
||||
if(y_cpy!=y) delete[] y_cpy;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -305,6 +305,7 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
|
||||
int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc)
|
||||
{
|
||||
// std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
|
||||
#if !ISCOMPLEX
|
||||
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar);
|
||||
static functype func[8];
|
||||
|
||||
@ -324,6 +325,7 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
||||
|
||||
init = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
||||
|
Loading…
x
Reference in New Issue
Block a user