mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 03:09:01 +08:00
Fix "routine is both "inline" and "noinline"" warnings
This commit is contained in:
parent
e5bf4440c0
commit
3930c9b086
@ -222,7 +222,29 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
|
|||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
template<int StorageOrder> struct outer_product_selector;
|
|
||||||
|
// Column major
|
||||||
|
template<typename ProductType, typename Dest, typename Func>
|
||||||
|
EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
|
||||||
|
{
|
||||||
|
typedef typename Dest::Index Index;
|
||||||
|
// FIXME make sure lhs is sequentially stored
|
||||||
|
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
||||||
|
const Index cols = dest.cols();
|
||||||
|
for (Index j=0; j<cols; ++j)
|
||||||
|
func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Row major
|
||||||
|
template<typename ProductType, typename Dest, typename Func>
|
||||||
|
EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
|
||||||
|
typedef typename Dest::Index Index;
|
||||||
|
// FIXME make sure rhs is sequentially stored
|
||||||
|
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
||||||
|
const Index rows = dest.rows();
|
||||||
|
for (Index i=0; i<rows; ++i)
|
||||||
|
func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
|
||||||
|
}
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs>
|
template<typename Lhs, typename Rhs>
|
||||||
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
|
struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
|
||||||
@ -235,6 +257,8 @@ template<typename Lhs, typename Rhs>
|
|||||||
class GeneralProduct<Lhs, Rhs, OuterProduct>
|
class GeneralProduct<Lhs, Rhs, OuterProduct>
|
||||||
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
|
: public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
|
||||||
{
|
{
|
||||||
|
template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
|
||||||
|
|
||||||
@ -257,53 +281,25 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
|
|||||||
|
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
inline void evalTo(Dest& dest) const {
|
inline void evalTo(Dest& dest) const {
|
||||||
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, set());
|
internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
inline void addTo(Dest& dest) const {
|
inline void addTo(Dest& dest) const {
|
||||||
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, add());
|
internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
inline void subTo(Dest& dest) const {
|
inline void subTo(Dest& dest) const {
|
||||||
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, sub());
|
internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
|
template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
|
||||||
{
|
{
|
||||||
internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, adds(alpha));
|
internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
template<> struct outer_product_selector<ColMajor> {
|
|
||||||
template<typename ProductType, typename Dest, typename Func>
|
|
||||||
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, const Func& func) {
|
|
||||||
typedef typename Dest::Index Index;
|
|
||||||
// FIXME make sure lhs is sequentially stored
|
|
||||||
// FIXME not very good if rhs is real and lhs complex while alpha is real too
|
|
||||||
const Index cols = dest.cols();
|
|
||||||
for (Index j=0; j<cols; ++j)
|
|
||||||
func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<> struct outer_product_selector<RowMajor> {
|
|
||||||
template<typename ProductType, typename Dest, typename Func>
|
|
||||||
static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, const Func& func) {
|
|
||||||
typedef typename Dest::Index Index;
|
|
||||||
// FIXME make sure rhs is sequentially stored
|
|
||||||
// FIXME not very good if lhs is real and rhs complex while alpha is real too
|
|
||||||
const Index rows = dest.rows();
|
|
||||||
for (Index i=0; i<rows; ++i)
|
|
||||||
func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // end namespace internal
|
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
* Implementation of General Matrix Vector Product
|
* Implementation of General Matrix Vector Product
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
@ -529,7 +529,14 @@ struct gebp_kernel
|
|||||||
|
|
||||||
EIGEN_DONT_INLINE
|
EIGEN_DONT_INLINE
|
||||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
|
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
|
||||||
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
|
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
|
||||||
|
::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
|
||||||
|
Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
|
||||||
{
|
{
|
||||||
Traits traits;
|
Traits traits;
|
||||||
|
|
||||||
@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4");
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
#undef CJMADD
|
#undef CJMADD
|
||||||
|
|
||||||
@ -1110,81 +1117,84 @@ EIGEN_ASM_COMMENT("mybegin4");
|
|||||||
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
|
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
|
||||||
struct gemm_pack_lhs
|
struct gemm_pack_lhs
|
||||||
{
|
{
|
||||||
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
|
EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
|
||||||
Index stride=0, Index offset=0)
|
|
||||||
{
|
|
||||||
typedef typename packet_traits<Scalar>::type Packet;
|
|
||||||
enum { PacketSize = packet_traits<Scalar>::size };
|
|
||||||
|
|
||||||
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
|
|
||||||
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
|
||||||
eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
|
|
||||||
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
|
||||||
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
|
|
||||||
Index count = 0;
|
|
||||||
Index peeled_mc = (rows/Pack1)*Pack1;
|
|
||||||
for(Index i=0; i<peeled_mc; i+=Pack1)
|
|
||||||
{
|
|
||||||
if(PanelMode) count += Pack1 * offset;
|
|
||||||
|
|
||||||
if(StorageOrder==ColMajor)
|
|
||||||
{
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
{
|
|
||||||
Packet A, B, C, D;
|
|
||||||
if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
|
|
||||||
if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
|
|
||||||
if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
|
|
||||||
if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
|
|
||||||
if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
|
|
||||||
if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
|
|
||||||
if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
|
|
||||||
if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
{
|
|
||||||
// TODO add a vectorized transpose here
|
|
||||||
Index w=0;
|
|
||||||
for(; w<Pack1-3; w+=4)
|
|
||||||
{
|
|
||||||
Scalar a(cj(lhs(i+w+0, k))),
|
|
||||||
b(cj(lhs(i+w+1, k))),
|
|
||||||
c(cj(lhs(i+w+2, k))),
|
|
||||||
d(cj(lhs(i+w+3, k)));
|
|
||||||
blockA[count++] = a;
|
|
||||||
blockA[count++] = b;
|
|
||||||
blockA[count++] = c;
|
|
||||||
blockA[count++] = d;
|
|
||||||
}
|
|
||||||
if(Pack1%4)
|
|
||||||
for(;w<Pack1;++w)
|
|
||||||
blockA[count++] = cj(lhs(i+w, k));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(PanelMode) count += Pack1 * (stride-offset-depth);
|
|
||||||
}
|
|
||||||
if(rows-peeled_mc>=Pack2)
|
|
||||||
{
|
|
||||||
if(PanelMode) count += Pack2*offset;
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
for(Index w=0; w<Pack2; w++)
|
|
||||||
blockA[count++] = cj(lhs(peeled_mc+w, k));
|
|
||||||
if(PanelMode) count += Pack2 * (stride-offset-depth);
|
|
||||||
peeled_mc += Pack2;
|
|
||||||
}
|
|
||||||
for(Index i=peeled_mc; i<rows; i++)
|
|
||||||
{
|
|
||||||
if(PanelMode) count += offset;
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
blockA[count++] = cj(lhs(i, k));
|
|
||||||
if(PanelMode) count += (stride-offset-depth);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
|
||||||
|
EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
|
||||||
|
::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
|
||||||
|
{
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
enum { PacketSize = packet_traits<Scalar>::size };
|
||||||
|
|
||||||
|
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
|
||||||
|
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||||
|
eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
|
||||||
|
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||||
|
const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
|
||||||
|
Index count = 0;
|
||||||
|
Index peeled_mc = (rows/Pack1)*Pack1;
|
||||||
|
for(Index i=0; i<peeled_mc; i+=Pack1)
|
||||||
|
{
|
||||||
|
if(PanelMode) count += Pack1 * offset;
|
||||||
|
|
||||||
|
if(StorageOrder==ColMajor)
|
||||||
|
{
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
{
|
||||||
|
Packet A, B, C, D;
|
||||||
|
if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
|
||||||
|
if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
|
||||||
|
if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
|
||||||
|
if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
|
||||||
|
if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
|
||||||
|
if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
|
||||||
|
if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
|
||||||
|
if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
{
|
||||||
|
// TODO add a vectorized transpose here
|
||||||
|
Index w=0;
|
||||||
|
for(; w<Pack1-3; w+=4)
|
||||||
|
{
|
||||||
|
Scalar a(cj(lhs(i+w+0, k))),
|
||||||
|
b(cj(lhs(i+w+1, k))),
|
||||||
|
c(cj(lhs(i+w+2, k))),
|
||||||
|
d(cj(lhs(i+w+3, k)));
|
||||||
|
blockA[count++] = a;
|
||||||
|
blockA[count++] = b;
|
||||||
|
blockA[count++] = c;
|
||||||
|
blockA[count++] = d;
|
||||||
|
}
|
||||||
|
if(Pack1%4)
|
||||||
|
for(;w<Pack1;++w)
|
||||||
|
blockA[count++] = cj(lhs(i+w, k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(PanelMode) count += Pack1 * (stride-offset-depth);
|
||||||
|
}
|
||||||
|
if(rows-peeled_mc>=Pack2)
|
||||||
|
{
|
||||||
|
if(PanelMode) count += Pack2*offset;
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
for(Index w=0; w<Pack2; w++)
|
||||||
|
blockA[count++] = cj(lhs(peeled_mc+w, k));
|
||||||
|
if(PanelMode) count += Pack2 * (stride-offset-depth);
|
||||||
|
peeled_mc += Pack2;
|
||||||
|
}
|
||||||
|
for(Index i=peeled_mc; i<rows; i++)
|
||||||
|
{
|
||||||
|
if(PanelMode) count += offset;
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
blockA[count++] = cj(lhs(i, k));
|
||||||
|
if(PanelMode) count += (stride-offset-depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// copy a complete panel of the rhs
|
// copy a complete panel of the rhs
|
||||||
// this version is optimized for column major matrices
|
// this version is optimized for column major matrices
|
||||||
// The traversal order is as follow: (nr==4):
|
// The traversal order is as follow: (nr==4):
|
||||||
@ -1197,93 +1207,99 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
|
|||||||
{
|
{
|
||||||
typedef typename packet_traits<Scalar>::type Packet;
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
enum { PacketSize = packet_traits<Scalar>::size };
|
enum { PacketSize = packet_traits<Scalar>::size };
|
||||||
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
|
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
|
||||||
Index stride=0, Index offset=0)
|
|
||||||
{
|
|
||||||
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
|
|
||||||
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
|
||||||
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
|
||||||
Index packet_cols = (cols/nr) * nr;
|
|
||||||
Index count = 0;
|
|
||||||
for(Index j2=0; j2<packet_cols; j2+=nr)
|
|
||||||
{
|
|
||||||
// skip what we have before
|
|
||||||
if(PanelMode) count += nr * offset;
|
|
||||||
const Scalar* b0 = &rhs[(j2+0)*rhsStride];
|
|
||||||
const Scalar* b1 = &rhs[(j2+1)*rhsStride];
|
|
||||||
const Scalar* b2 = &rhs[(j2+2)*rhsStride];
|
|
||||||
const Scalar* b3 = &rhs[(j2+3)*rhsStride];
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
{
|
|
||||||
blockB[count+0] = cj(b0[k]);
|
|
||||||
blockB[count+1] = cj(b1[k]);
|
|
||||||
if(nr==4) blockB[count+2] = cj(b2[k]);
|
|
||||||
if(nr==4) blockB[count+3] = cj(b3[k]);
|
|
||||||
count += nr;
|
|
||||||
}
|
|
||||||
// skip what we have after
|
|
||||||
if(PanelMode) count += nr * (stride-offset-depth);
|
|
||||||
}
|
|
||||||
|
|
||||||
// copy the remaining columns one at a time (nr==1)
|
|
||||||
for(Index j2=packet_cols; j2<cols; ++j2)
|
|
||||||
{
|
|
||||||
if(PanelMode) count += offset;
|
|
||||||
const Scalar* b0 = &rhs[(j2+0)*rhsStride];
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
{
|
|
||||||
blockB[count] = cj(b0[k]);
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
if(PanelMode) count += (stride-offset-depth);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
|
||||||
|
EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
|
||||||
|
::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
|
||||||
|
{
|
||||||
|
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
|
||||||
|
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||||
|
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||||
|
Index packet_cols = (cols/nr) * nr;
|
||||||
|
Index count = 0;
|
||||||
|
for(Index j2=0; j2<packet_cols; j2+=nr)
|
||||||
|
{
|
||||||
|
// skip what we have before
|
||||||
|
if(PanelMode) count += nr * offset;
|
||||||
|
const Scalar* b0 = &rhs[(j2+0)*rhsStride];
|
||||||
|
const Scalar* b1 = &rhs[(j2+1)*rhsStride];
|
||||||
|
const Scalar* b2 = &rhs[(j2+2)*rhsStride];
|
||||||
|
const Scalar* b3 = &rhs[(j2+3)*rhsStride];
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
{
|
||||||
|
blockB[count+0] = cj(b0[k]);
|
||||||
|
blockB[count+1] = cj(b1[k]);
|
||||||
|
if(nr==4) blockB[count+2] = cj(b2[k]);
|
||||||
|
if(nr==4) blockB[count+3] = cj(b3[k]);
|
||||||
|
count += nr;
|
||||||
|
}
|
||||||
|
// skip what we have after
|
||||||
|
if(PanelMode) count += nr * (stride-offset-depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy the remaining columns one at a time (nr==1)
|
||||||
|
for(Index j2=packet_cols; j2<cols; ++j2)
|
||||||
|
{
|
||||||
|
if(PanelMode) count += offset;
|
||||||
|
const Scalar* b0 = &rhs[(j2+0)*rhsStride];
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
{
|
||||||
|
blockB[count] = cj(b0[k]);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
if(PanelMode) count += (stride-offset-depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// this version is optimized for row major matrices
|
// this version is optimized for row major matrices
|
||||||
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
|
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
|
||||||
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
|
struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
|
||||||
{
|
{
|
||||||
enum { PacketSize = packet_traits<Scalar>::size };
|
enum { PacketSize = packet_traits<Scalar>::size };
|
||||||
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
|
EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
|
||||||
Index stride=0, Index offset=0)
|
|
||||||
{
|
|
||||||
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
|
|
||||||
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
|
||||||
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
|
||||||
Index packet_cols = (cols/nr) * nr;
|
|
||||||
Index count = 0;
|
|
||||||
for(Index j2=0; j2<packet_cols; j2+=nr)
|
|
||||||
{
|
|
||||||
// skip what we have before
|
|
||||||
if(PanelMode) count += nr * offset;
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
{
|
|
||||||
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
|
||||||
blockB[count+0] = cj(b0[0]);
|
|
||||||
blockB[count+1] = cj(b0[1]);
|
|
||||||
if(nr==4) blockB[count+2] = cj(b0[2]);
|
|
||||||
if(nr==4) blockB[count+3] = cj(b0[3]);
|
|
||||||
count += nr;
|
|
||||||
}
|
|
||||||
// skip what we have after
|
|
||||||
if(PanelMode) count += nr * (stride-offset-depth);
|
|
||||||
}
|
|
||||||
// copy the remaining columns one at a time (nr==1)
|
|
||||||
for(Index j2=packet_cols; j2<cols; ++j2)
|
|
||||||
{
|
|
||||||
if(PanelMode) count += offset;
|
|
||||||
const Scalar* b0 = &rhs[j2];
|
|
||||||
for(Index k=0; k<depth; k++)
|
|
||||||
{
|
|
||||||
blockB[count] = cj(b0[k*rhsStride]);
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
if(PanelMode) count += stride-offset-depth;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
|
||||||
|
EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
|
||||||
|
::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
|
||||||
|
{
|
||||||
|
EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
|
||||||
|
eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
|
||||||
|
conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
|
||||||
|
Index packet_cols = (cols/nr) * nr;
|
||||||
|
Index count = 0;
|
||||||
|
for(Index j2=0; j2<packet_cols; j2+=nr)
|
||||||
|
{
|
||||||
|
// skip what we have before
|
||||||
|
if(PanelMode) count += nr * offset;
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
{
|
||||||
|
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
||||||
|
blockB[count+0] = cj(b0[0]);
|
||||||
|
blockB[count+1] = cj(b0[1]);
|
||||||
|
if(nr==4) blockB[count+2] = cj(b0[2]);
|
||||||
|
if(nr==4) blockB[count+3] = cj(b0[3]);
|
||||||
|
count += nr;
|
||||||
|
}
|
||||||
|
// skip what we have after
|
||||||
|
if(PanelMode) count += nr * (stride-offset-depth);
|
||||||
|
}
|
||||||
|
// copy the remaining columns one at a time (nr==1)
|
||||||
|
for(Index j2=packet_cols; j2<cols; ++j2)
|
||||||
|
{
|
||||||
|
if(PanelMode) count += offset;
|
||||||
|
const Scalar* b0 = &rhs[j2];
|
||||||
|
for(Index k=0; k<depth; k++)
|
||||||
|
{
|
||||||
|
blockB[count] = cj(b0[k*rhsStride]);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
if(PanelMode) count += stride-offset-depth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
|
/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
|
||||||
|
@ -50,6 +50,7 @@ template<
|
|||||||
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
|
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
|
||||||
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
|
struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
|
||||||
{
|
{
|
||||||
|
|
||||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||||
static void run(Index rows, Index cols, Index depth,
|
static void run(Index rows, Index cols, Index depth,
|
||||||
const LhsScalar* _lhs, Index lhsStride,
|
const LhsScalar* _lhs, Index lhsStride,
|
||||||
@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth,
|
|||||||
// vertical panel which is, in practice, a very low number.
|
// vertical panel which is, in practice, a very low number.
|
||||||
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
|
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
|
||||||
|
|
||||||
|
|
||||||
// For each mc x kc block of the lhs's vertical panel...
|
// For each mc x kc block of the lhs's vertical panel...
|
||||||
// (==GEPP_VAR1)
|
// (==GEPP_VAR1)
|
||||||
for(Index i2=0; i2<rows; i2+=mc)
|
for(Index i2=0; i2<rows; i2+=mc)
|
||||||
@ -183,7 +183,6 @@ static void run(Index rows, Index cols, Index depth,
|
|||||||
|
|
||||||
// Everything is packed, we can now call the block * panel kernel:
|
// Everything is packed, we can now call the block * panel kernel:
|
||||||
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,6 +49,18 @@ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
|
|||||||
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
|
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
|
||||||
|
|
||||||
EIGEN_DONT_INLINE static void run(
|
EIGEN_DONT_INLINE static void run(
|
||||||
|
Index rows, Index cols,
|
||||||
|
const LhsScalar* lhs, Index lhsStride,
|
||||||
|
const RhsScalar* rhs, Index rhsIncr,
|
||||||
|
ResScalar* res, Index
|
||||||
|
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||||
|
resIncr
|
||||||
|
#endif
|
||||||
|
, RhsScalar alpha);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||||
|
EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const LhsScalar* lhs, Index lhsStride,
|
const LhsScalar* lhs, Index lhsStride,
|
||||||
const RhsScalar* rhs, Index rhsIncr,
|
const RhsScalar* rhs, Index rhsIncr,
|
||||||
@ -274,7 +286,6 @@ EIGEN_DONT_INLINE static void run(
|
|||||||
} while(Vectorizable);
|
} while(Vectorizable);
|
||||||
#undef _EIGEN_ACCUMULATE_PACKETS
|
#undef _EIGEN_ACCUMULATE_PACKETS
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
/* Optimized row-major matrix * vector product:
|
/* Optimized row-major matrix * vector product:
|
||||||
* This algorithm processes 4 rows at onces that allows to both reduce
|
* This algorithm processes 4 rows at onces that allows to both reduce
|
||||||
@ -308,6 +319,15 @@ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
|
|||||||
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
|
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
|
||||||
|
|
||||||
EIGEN_DONT_INLINE static void run(
|
EIGEN_DONT_INLINE static void run(
|
||||||
|
Index rows, Index cols,
|
||||||
|
const LhsScalar* lhs, Index lhsStride,
|
||||||
|
const RhsScalar* rhs, Index rhsIncr,
|
||||||
|
ResScalar* res, Index resIncr,
|
||||||
|
ResScalar alpha);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
|
||||||
|
EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const LhsScalar* lhs, Index lhsStride,
|
const LhsScalar* lhs, Index lhsStride,
|
||||||
const RhsScalar* rhs, Index rhsIncr,
|
const RhsScalar* rhs, Index rhsIncr,
|
||||||
@ -545,7 +565,6 @@ EIGEN_DONT_INLINE static void run(
|
|||||||
|
|
||||||
#undef _EIGEN_ACCUMULATE_PACKETS
|
#undef _EIGEN_ACCUMULATE_PACKETS
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv :
|
|||||||
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
|
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
|
||||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||||
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const Scalar* lhs, Index lhsStride, \
|
const Scalar* lhs, Index lhsStride, \
|
||||||
const Scalar* rhs, Index rhsIncr, \
|
const Scalar* rhs, Index rhsIncr, \
|
||||||
@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \
|
|||||||
}; \
|
}; \
|
||||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||||
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const Scalar* lhs, Index lhsStride, \
|
const Scalar* lhs, Index lhsStride, \
|
||||||
const Scalar* rhs, Index rhsIncr, \
|
const Scalar* rhs, Index rhsIncr, \
|
||||||
@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,Conjugat
|
|||||||
{ \
|
{ \
|
||||||
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
|
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
|
||||||
\
|
\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* lhs, Index lhsStride, \
|
const EIGTYPE* lhs, Index lhsStride, \
|
||||||
const EIGTYPE* rhs, Index rhsIncr, \
|
const EIGTYPE* rhs, Index rhsIncr, \
|
||||||
|
@ -230,6 +230,17 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
|
|||||||
{
|
{
|
||||||
|
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
|
Index rows, Index cols,
|
||||||
|
const Scalar* _lhs, Index lhsStride,
|
||||||
|
const Scalar* _rhs, Index rhsStride,
|
||||||
|
Scalar* res, Index resStride,
|
||||||
|
const Scalar& alpha);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, typename Index,
|
||||||
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
|
int RhsStorageOrder, bool ConjugateRhs>
|
||||||
|
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
@ -301,7 +312,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
// matrix * selfadjoint product
|
// matrix * selfadjoint product
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
@ -311,6 +321,17 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
|
|||||||
{
|
{
|
||||||
|
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
|
Index rows, Index cols,
|
||||||
|
const Scalar* _lhs, Index lhsStride,
|
||||||
|
const Scalar* _rhs, Index rhsStride,
|
||||||
|
Scalar* res, Index resStride,
|
||||||
|
const Scalar& alpha);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, typename Index,
|
||||||
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
|
int RhsStorageOrder, bool ConjugateRhs>
|
||||||
|
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
@ -353,7 +374,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
********************************************************************************
|
********************************************************************************
|
||||||
* Content : Eigen bindings to Intel(R) MKL
|
* Content : Eigen bindings to Intel(R) MKL
|
||||||
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
|
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
|
||||||
@ -47,7 +47,7 @@ template <typename Index, \
|
|||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
||||||
{\
|
{\
|
||||||
\
|
\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
@ -98,7 +98,7 @@ template <typename Index, \
|
|||||||
int RhsStorageOrder, bool ConjugateRhs> \
|
int RhsStorageOrder, bool ConjugateRhs> \
|
||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
||||||
{\
|
{\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
@ -174,7 +174,7 @@ template <typename Index, \
|
|||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
||||||
{\
|
{\
|
||||||
\
|
\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
@ -224,7 +224,7 @@ template <typename Index, \
|
|||||||
int RhsStorageOrder, bool ConjugateRhs> \
|
int RhsStorageOrder, bool ConjugateRhs> \
|
||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
||||||
{\
|
{\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
|
@ -28,6 +28,15 @@ struct selfadjoint_matrix_vector_product
|
|||||||
|
|
||||||
{
|
{
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
|
Index size,
|
||||||
|
const Scalar* lhs, Index lhsStride,
|
||||||
|
const Scalar* _rhs, Index rhsIncr,
|
||||||
|
Scalar* res,
|
||||||
|
Scalar alpha);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
|
||||||
|
EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
|
||||||
Index size,
|
Index size,
|
||||||
const Scalar* lhs, Index lhsStride,
|
const Scalar* lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsIncr,
|
const Scalar* _rhs, Index rhsIncr,
|
||||||
@ -153,7 +162,6 @@ static EIGEN_DONT_INLINE void run(
|
|||||||
res[j] += alpha * t2;
|
res[j] += alpha * t2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv :
|
|||||||
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
|
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
|
||||||
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
|
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
|
||||||
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
|
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index size, const Scalar* lhs, Index lhsStride, \
|
Index size, const Scalar* lhs, Index lhsStride, \
|
||||||
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
|
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
|
||||||
enum {\
|
enum {\
|
||||||
@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,Co
|
|||||||
{ \
|
{ \
|
||||||
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
|
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
|
||||||
\
|
\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index size, const EIGTYPE* lhs, Index lhsStride, \
|
Index size, const EIGTYPE* lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
|
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
|
||||||
{ \
|
{ \
|
||||||
|
@ -96,6 +96,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resStride,
|
||||||
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, typename Index, int Mode,
|
||||||
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
|
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
||||||
|
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||||
|
LhsStorageOrder,ConjugateLhs,
|
||||||
|
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
|
||||||
|
Index _rows, Index _cols, Index _depth,
|
||||||
|
const Scalar* _lhs, Index lhsStride,
|
||||||
|
const Scalar* _rhs, Index rhsStride,
|
||||||
|
Scalar* res, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
// strip zeros
|
// strip zeros
|
||||||
@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
// implements col-major += alpha * op(general) * op(triangular)
|
// implements col-major += alpha * op(general) * op(triangular)
|
||||||
template <typename Scalar, typename Index, int Mode,
|
template <typename Scalar, typename Index, int Mode,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
||||||
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||||
LhsStorageOrder,ConjugateLhs,
|
LhsStorageOrder,ConjugateLhs,
|
||||||
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
||||||
{
|
{
|
||||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||||
enum {
|
enum {
|
||||||
@ -225,6 +237,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resStride,
|
||||||
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, typename Index, int Mode,
|
||||||
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
|
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
||||||
|
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||||
|
LhsStorageOrder,ConjugateLhs,
|
||||||
|
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
|
||||||
|
Index _rows, Index _cols, Index _depth,
|
||||||
|
const Scalar* _lhs, Index lhsStride,
|
||||||
|
const Scalar* _rhs, Index rhsStride,
|
||||||
|
Scalar* res, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
// strip zeros
|
// strip zeros
|
||||||
@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
* Wrapper to product_triangular_matrix_matrix
|
* Wrapper to product_triangular_matrix_matrix
|
||||||
|
@ -91,7 +91,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
|||||||
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
|
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
|
||||||
}; \
|
}; \
|
||||||
\
|
\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index _rows, Index _cols, Index _depth, \
|
Index _rows, Index _cols, Index _depth, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
@ -205,7 +205,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
|||||||
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
|
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
|
||||||
}; \
|
}; \
|
||||||
\
|
\
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index _rows, Index _cols, Index _depth, \
|
Index _rows, Index _cols, Index _depth, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
|
@ -27,7 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
|
|||||||
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
|
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
|
||||||
};
|
};
|
||||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
||||||
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
|
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
|
||||||
|
EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
|
||||||
|
::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
||||||
|
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
|
||||||
{
|
{
|
||||||
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
|
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
|
||||||
Index size = (std::min)(_rows,_cols);
|
Index size = (std::min)(_rows,_cols);
|
||||||
@ -78,7 +84,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
|
|||||||
_res, resIncr, alpha);
|
_res, resIncr, alpha);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
|
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
|
||||||
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
|
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
|
||||||
@ -89,8 +94,14 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
|
|||||||
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
|
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
|
||||||
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
|
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
|
||||||
};
|
};
|
||||||
static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
||||||
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
|
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
|
||||||
|
EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
|
||||||
|
::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
|
||||||
|
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
|
||||||
{
|
{
|
||||||
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
|
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
|
||||||
Index diagSize = (std::min)(_rows,_cols);
|
Index diagSize = (std::min)(_rows,_cols);
|
||||||
@ -141,7 +152,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
|
|||||||
&res.coeffRef(diagSize), resIncr, alpha);
|
&res.coeffRef(diagSize), resIncr, alpha);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
* Wrapper to product_triangular_vector
|
* Wrapper to product_triangular_vector
|
||||||
|
@ -50,7 +50,7 @@ struct triangular_matrix_vector_product_trmv :
|
|||||||
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
|
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
|
||||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||||
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
|
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
|
||||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
|
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
|
||||||
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
|
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
|
||||||
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
|
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
|
||||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||||
@ -58,7 +58,7 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
|
|||||||
}; \
|
}; \
|
||||||
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
|
||||||
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
|
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
|
||||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
|
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
|
||||||
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
|
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
|
||||||
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
|
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
|
||||||
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
|
||||||
@ -81,8 +81,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
|||||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||||
LowUp = IsLower ? Lower : Upper \
|
LowUp = IsLower ? Lower : Upper \
|
||||||
}; \
|
}; \
|
||||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
||||||
{ \
|
{ \
|
||||||
if (ConjLhs || IsZeroDiag) { \
|
if (ConjLhs || IsZeroDiag) { \
|
||||||
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
|
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
|
||||||
@ -166,8 +166,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
|
|||||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||||
LowUp = IsLower ? Lower : Upper \
|
LowUp = IsLower ? Lower : Upper \
|
||||||
}; \
|
}; \
|
||||||
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
|
||||||
{ \
|
{ \
|
||||||
if (IsZeroDiag) { \
|
if (IsZeroDiag) { \
|
||||||
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
|
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
|
||||||
|
@ -18,7 +18,7 @@ namespace internal {
|
|||||||
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
|
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
|
||||||
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
|
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
|
||||||
{
|
{
|
||||||
static EIGEN_DONT_INLINE void run(
|
static void run(
|
||||||
Index size, Index cols,
|
Index size, Index cols,
|
||||||
const Scalar* tri, Index triStride,
|
const Scalar* tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherStride,
|
||||||
@ -39,6 +39,13 @@ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStor
|
|||||||
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
|
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
|
||||||
{
|
{
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
|
Index size, Index otherSize,
|
||||||
|
const Scalar* _tri, Index triStride,
|
||||||
|
Scalar* _other, Index otherStride,
|
||||||
|
level3_blocking<Scalar,Scalar>& blocking);
|
||||||
|
};
|
||||||
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
||||||
|
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
|
||||||
Index size, Index otherSize,
|
Index size, Index otherSize,
|
||||||
const Scalar* _tri, Index triStride,
|
const Scalar* _tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherStride,
|
||||||
@ -173,7 +180,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
|
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
|
||||||
*/
|
*/
|
||||||
@ -181,6 +187,13 @@ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStor
|
|||||||
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
|
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
|
||||||
{
|
{
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
|
Index size, Index otherSize,
|
||||||
|
const Scalar* _tri, Index triStride,
|
||||||
|
Scalar* _other, Index otherStride,
|
||||||
|
level3_blocking<Scalar,Scalar>& blocking);
|
||||||
|
};
|
||||||
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
||||||
|
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
|
||||||
Index size, Index otherSize,
|
Index size, Index otherSize,
|
||||||
const Scalar* _tri, Index triStride,
|
const Scalar* _tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherStride,
|
||||||
@ -308,7 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
|
|||||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||||
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
|
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
|
||||||
}; \
|
}; \
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index size, Index otherSize, \
|
Index size, Index otherSize, \
|
||||||
const EIGTYPE* _tri, Index triStride, \
|
const EIGTYPE* _tri, Index triStride, \
|
||||||
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
||||||
@ -103,7 +103,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
|
|||||||
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
|
||||||
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
|
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
|
||||||
}; \
|
}; \
|
||||||
static EIGEN_DONT_INLINE void run( \
|
static void run( \
|
||||||
Index size, Index otherSize, \
|
Index size, Index otherSize, \
|
||||||
const EIGTYPE* _tri, Index triStride, \
|
const EIGTYPE* _tri, Index triStride, \
|
||||||
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
||||||
|
@ -213,7 +213,7 @@ class SparseMatrix
|
|||||||
* inserted in increasing inner index order, and in O(nnz_j) for a random insertion.
|
* inserted in increasing inner index order, and in O(nnz_j) for a random insertion.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
EIGEN_DONT_INLINE Scalar& insert(Index row, Index col)
|
Scalar& insert(Index row, Index col)
|
||||||
{
|
{
|
||||||
if(isCompressed())
|
if(isCompressed())
|
||||||
{
|
{
|
||||||
@ -434,7 +434,7 @@ class SparseMatrix
|
|||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* same as insert(Index,Index) except that the indices are given relative to the storage order */
|
* same as insert(Index,Index) except that the indices are given relative to the storage order */
|
||||||
EIGEN_DONT_INLINE Scalar& insertByOuterInner(Index j, Index i)
|
Scalar& insertByOuterInner(Index j, Index i)
|
||||||
{
|
{
|
||||||
return insert(IsRowMajor ? j : i, IsRowMajor ? i : j);
|
return insert(IsRowMajor ? j : i, IsRowMajor ? i : j);
|
||||||
}
|
}
|
||||||
@ -711,62 +711,7 @@ class SparseMatrix
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other)
|
EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase<OtherDerived>& other);
|
||||||
{
|
|
||||||
const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
|
|
||||||
if (needToTranspose)
|
|
||||||
{
|
|
||||||
// two passes algorithm:
|
|
||||||
// 1 - compute the number of coeffs per dest inner vector
|
|
||||||
// 2 - do the actual copy/eval
|
|
||||||
// Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
|
|
||||||
typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
|
|
||||||
typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
|
|
||||||
OtherCopy otherCopy(other.derived());
|
|
||||||
|
|
||||||
SparseMatrix dest(other.rows(),other.cols());
|
|
||||||
Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
|
|
||||||
|
|
||||||
// pass 1
|
|
||||||
// FIXME the above copy could be merged with that pass
|
|
||||||
for (Index j=0; j<otherCopy.outerSize(); ++j)
|
|
||||||
for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
|
|
||||||
++dest.m_outerIndex[it.index()];
|
|
||||||
|
|
||||||
// prefix sum
|
|
||||||
Index count = 0;
|
|
||||||
VectorXi positions(dest.outerSize());
|
|
||||||
for (Index j=0; j<dest.outerSize(); ++j)
|
|
||||||
{
|
|
||||||
Index tmp = dest.m_outerIndex[j];
|
|
||||||
dest.m_outerIndex[j] = count;
|
|
||||||
positions[j] = count;
|
|
||||||
count += tmp;
|
|
||||||
}
|
|
||||||
dest.m_outerIndex[dest.outerSize()] = count;
|
|
||||||
// alloc
|
|
||||||
dest.m_data.resize(count);
|
|
||||||
// pass 2
|
|
||||||
for (Index j=0; j<otherCopy.outerSize(); ++j)
|
|
||||||
{
|
|
||||||
for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
|
|
||||||
{
|
|
||||||
Index pos = positions[it.index()]++;
|
|
||||||
dest.m_data.index(pos) = j;
|
|
||||||
dest.m_data.value(pos) = it.value();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this->swap(dest);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(other.isRValue())
|
|
||||||
initAssignment(other.derived());
|
|
||||||
// there is no special optimization
|
|
||||||
return Base::operator=(other.derived());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)
|
friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)
|
||||||
{
|
{
|
||||||
@ -836,111 +781,7 @@ protected:
|
|||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \sa insert(Index,Index) */
|
* \sa insert(Index,Index) */
|
||||||
EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col)
|
EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col);
|
||||||
{
|
|
||||||
eigen_assert(isCompressed());
|
|
||||||
|
|
||||||
const Index outer = IsRowMajor ? row : col;
|
|
||||||
const Index inner = IsRowMajor ? col : row;
|
|
||||||
|
|
||||||
Index previousOuter = outer;
|
|
||||||
if (m_outerIndex[outer+1]==0)
|
|
||||||
{
|
|
||||||
// we start a new inner vector
|
|
||||||
while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
|
|
||||||
{
|
|
||||||
m_outerIndex[previousOuter] = static_cast<Index>(m_data.size());
|
|
||||||
--previousOuter;
|
|
||||||
}
|
|
||||||
m_outerIndex[outer+1] = m_outerIndex[outer];
|
|
||||||
}
|
|
||||||
|
|
||||||
// here we have to handle the tricky case where the outerIndex array
|
|
||||||
// starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,
|
|
||||||
// the 2nd inner vector...
|
|
||||||
bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
|
|
||||||
&& (size_t(m_outerIndex[outer+1]) == m_data.size());
|
|
||||||
|
|
||||||
size_t startId = m_outerIndex[outer];
|
|
||||||
// FIXME let's make sure sizeof(long int) == sizeof(size_t)
|
|
||||||
size_t p = m_outerIndex[outer+1];
|
|
||||||
++m_outerIndex[outer+1];
|
|
||||||
|
|
||||||
float reallocRatio = 1;
|
|
||||||
if (m_data.allocatedSize()<=m_data.size())
|
|
||||||
{
|
|
||||||
// if there is no preallocated memory, let's reserve a minimum of 32 elements
|
|
||||||
if (m_data.size()==0)
|
|
||||||
{
|
|
||||||
m_data.reserve(32);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// we need to reallocate the data, to reduce multiple reallocations
|
|
||||||
// we use a smart resize algorithm based on the current filling ratio
|
|
||||||
// in addition, we use float to avoid integers overflows
|
|
||||||
float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
|
|
||||||
reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
|
|
||||||
// furthermore we bound the realloc ratio to:
|
|
||||||
// 1) reduce multiple minor realloc when the matrix is almost filled
|
|
||||||
// 2) avoid to allocate too much memory when the matrix is almost empty
|
|
||||||
reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_data.resize(m_data.size()+1,reallocRatio);
|
|
||||||
|
|
||||||
if (!isLastVec)
|
|
||||||
{
|
|
||||||
if (previousOuter==-1)
|
|
||||||
{
|
|
||||||
// oops wrong guess.
|
|
||||||
// let's correct the outer offsets
|
|
||||||
for (Index k=0; k<=(outer+1); ++k)
|
|
||||||
m_outerIndex[k] = 0;
|
|
||||||
Index k=outer+1;
|
|
||||||
while(m_outerIndex[k]==0)
|
|
||||||
m_outerIndex[k++] = 1;
|
|
||||||
while (k<=m_outerSize && m_outerIndex[k]!=0)
|
|
||||||
m_outerIndex[k++]++;
|
|
||||||
p = 0;
|
|
||||||
--k;
|
|
||||||
k = m_outerIndex[k]-1;
|
|
||||||
while (k>0)
|
|
||||||
{
|
|
||||||
m_data.index(k) = m_data.index(k-1);
|
|
||||||
m_data.value(k) = m_data.value(k-1);
|
|
||||||
k--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// we are not inserting into the last inner vec
|
|
||||||
// update outer indices:
|
|
||||||
Index j = outer+2;
|
|
||||||
while (j<=m_outerSize && m_outerIndex[j]!=0)
|
|
||||||
m_outerIndex[j++]++;
|
|
||||||
--j;
|
|
||||||
// shift data of last vecs:
|
|
||||||
Index k = m_outerIndex[j]-1;
|
|
||||||
while (k>=Index(p))
|
|
||||||
{
|
|
||||||
m_data.index(k) = m_data.index(k-1);
|
|
||||||
m_data.value(k) = m_data.value(k-1);
|
|
||||||
k--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while ( (p > startId) && (m_data.index(p-1) > inner) )
|
|
||||||
{
|
|
||||||
m_data.index(p) = m_data.index(p-1);
|
|
||||||
m_data.value(p) = m_data.value(p-1);
|
|
||||||
--p;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_data.index(p) = inner;
|
|
||||||
return (m_data.value(p) = 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* A vector object that is equal to 0 everywhere but v at the position i */
|
* A vector object that is equal to 0 everywhere but v at the position i */
|
||||||
@ -959,36 +800,7 @@ protected:
|
|||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \sa insert(Index,Index) */
|
* \sa insert(Index,Index) */
|
||||||
EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col)
|
EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col);
|
||||||
{
|
|
||||||
eigen_assert(!isCompressed());
|
|
||||||
|
|
||||||
const Index outer = IsRowMajor ? row : col;
|
|
||||||
const Index inner = IsRowMajor ? col : row;
|
|
||||||
|
|
||||||
std::ptrdiff_t room = m_outerIndex[outer+1] - m_outerIndex[outer];
|
|
||||||
std::ptrdiff_t innerNNZ = m_innerNonZeros[outer];
|
|
||||||
if(innerNNZ>=room)
|
|
||||||
{
|
|
||||||
// this inner vector is full, we need to reallocate the whole buffer :(
|
|
||||||
reserve(SingletonVector(outer,std::max<std::ptrdiff_t>(2,innerNNZ)));
|
|
||||||
}
|
|
||||||
|
|
||||||
Index startId = m_outerIndex[outer];
|
|
||||||
Index p = startId + m_innerNonZeros[outer];
|
|
||||||
while ( (p > startId) && (m_data.index(p-1) > inner) )
|
|
||||||
{
|
|
||||||
m_data.index(p) = m_data.index(p-1);
|
|
||||||
m_data.value(p) = m_data.value(p-1);
|
|
||||||
--p;
|
|
||||||
}
|
|
||||||
eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end");
|
|
||||||
|
|
||||||
m_innerNonZeros[outer]++;
|
|
||||||
|
|
||||||
m_data.index(p) = inner;
|
|
||||||
return (m_data.value(p) = 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/** \internal
|
/** \internal
|
||||||
@ -1205,6 +1017,204 @@ void SparseMatrix<Scalar,_Options,_Index>::sumupDuplicates()
|
|||||||
m_data.resize(m_outerIndex[m_outerSize]);
|
m_data.resize(m_outerIndex[m_outerSize]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Scalar, int _Options, typename _Index>
|
||||||
|
template<typename OtherDerived>
|
||||||
|
EIGEN_DONT_INLINE SparseMatrix<Scalar,_Options,_Index>& SparseMatrix<Scalar,_Options,_Index>::operator=(const SparseMatrixBase<OtherDerived>& other)
|
||||||
|
{
|
||||||
|
const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
|
||||||
|
if (needToTranspose)
|
||||||
|
{
|
||||||
|
// two passes algorithm:
|
||||||
|
// 1 - compute the number of coeffs per dest inner vector
|
||||||
|
// 2 - do the actual copy/eval
|
||||||
|
// Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed
|
||||||
|
typedef typename internal::nested<OtherDerived,2>::type OtherCopy;
|
||||||
|
typedef typename internal::remove_all<OtherCopy>::type _OtherCopy;
|
||||||
|
OtherCopy otherCopy(other.derived());
|
||||||
|
|
||||||
|
SparseMatrix dest(other.rows(),other.cols());
|
||||||
|
Eigen::Map<Matrix<Index, Dynamic, 1> > (dest.m_outerIndex,dest.outerSize()).setZero();
|
||||||
|
|
||||||
|
// pass 1
|
||||||
|
// FIXME the above copy could be merged with that pass
|
||||||
|
for (Index j=0; j<otherCopy.outerSize(); ++j)
|
||||||
|
for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
|
||||||
|
++dest.m_outerIndex[it.index()];
|
||||||
|
|
||||||
|
// prefix sum
|
||||||
|
Index count = 0;
|
||||||
|
VectorXi positions(dest.outerSize());
|
||||||
|
for (Index j=0; j<dest.outerSize(); ++j)
|
||||||
|
{
|
||||||
|
Index tmp = dest.m_outerIndex[j];
|
||||||
|
dest.m_outerIndex[j] = count;
|
||||||
|
positions[j] = count;
|
||||||
|
count += tmp;
|
||||||
|
}
|
||||||
|
dest.m_outerIndex[dest.outerSize()] = count;
|
||||||
|
// alloc
|
||||||
|
dest.m_data.resize(count);
|
||||||
|
// pass 2
|
||||||
|
for (Index j=0; j<otherCopy.outerSize(); ++j)
|
||||||
|
{
|
||||||
|
for (typename _OtherCopy::InnerIterator it(otherCopy, j); it; ++it)
|
||||||
|
{
|
||||||
|
Index pos = positions[it.index()]++;
|
||||||
|
dest.m_data.index(pos) = j;
|
||||||
|
dest.m_data.value(pos) = it.value();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this->swap(dest);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(other.isRValue())
|
||||||
|
initAssignment(other.derived());
|
||||||
|
// there is no special optimization
|
||||||
|
return Base::operator=(other.derived());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Scalar, int _Options, typename _Index>
|
||||||
|
EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col)
|
||||||
|
{
|
||||||
|
eigen_assert(!isCompressed());
|
||||||
|
|
||||||
|
const Index outer = IsRowMajor ? row : col;
|
||||||
|
const Index inner = IsRowMajor ? col : row;
|
||||||
|
|
||||||
|
std::ptrdiff_t room = m_outerIndex[outer+1] - m_outerIndex[outer];
|
||||||
|
std::ptrdiff_t innerNNZ = m_innerNonZeros[outer];
|
||||||
|
if(innerNNZ>=room)
|
||||||
|
{
|
||||||
|
// this inner vector is full, we need to reallocate the whole buffer :(
|
||||||
|
reserve(SingletonVector(outer,std::max<std::ptrdiff_t>(2,innerNNZ)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Index startId = m_outerIndex[outer];
|
||||||
|
Index p = startId + m_innerNonZeros[outer];
|
||||||
|
while ( (p > startId) && (m_data.index(p-1) > inner) )
|
||||||
|
{
|
||||||
|
m_data.index(p) = m_data.index(p-1);
|
||||||
|
m_data.value(p) = m_data.value(p-1);
|
||||||
|
--p;
|
||||||
|
}
|
||||||
|
eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end");
|
||||||
|
|
||||||
|
m_innerNonZeros[outer]++;
|
||||||
|
|
||||||
|
m_data.index(p) = inner;
|
||||||
|
return (m_data.value(p) = 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Scalar, int _Options, typename _Index>
|
||||||
|
EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertCompressed(Index row, Index col)
|
||||||
|
{
|
||||||
|
eigen_assert(isCompressed());
|
||||||
|
|
||||||
|
const Index outer = IsRowMajor ? row : col;
|
||||||
|
const Index inner = IsRowMajor ? col : row;
|
||||||
|
|
||||||
|
Index previousOuter = outer;
|
||||||
|
if (m_outerIndex[outer+1]==0)
|
||||||
|
{
|
||||||
|
// we start a new inner vector
|
||||||
|
while (previousOuter>=0 && m_outerIndex[previousOuter]==0)
|
||||||
|
{
|
||||||
|
m_outerIndex[previousOuter] = static_cast<Index>(m_data.size());
|
||||||
|
--previousOuter;
|
||||||
|
}
|
||||||
|
m_outerIndex[outer+1] = m_outerIndex[outer];
|
||||||
|
}
|
||||||
|
|
||||||
|
// here we have to handle the tricky case where the outerIndex array
|
||||||
|
// starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g.,
|
||||||
|
// the 2nd inner vector...
|
||||||
|
bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
|
||||||
|
&& (size_t(m_outerIndex[outer+1]) == m_data.size());
|
||||||
|
|
||||||
|
size_t startId = m_outerIndex[outer];
|
||||||
|
// FIXME let's make sure sizeof(long int) == sizeof(size_t)
|
||||||
|
size_t p = m_outerIndex[outer+1];
|
||||||
|
++m_outerIndex[outer+1];
|
||||||
|
|
||||||
|
float reallocRatio = 1;
|
||||||
|
if (m_data.allocatedSize()<=m_data.size())
|
||||||
|
{
|
||||||
|
// if there is no preallocated memory, let's reserve a minimum of 32 elements
|
||||||
|
if (m_data.size()==0)
|
||||||
|
{
|
||||||
|
m_data.reserve(32);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// we need to reallocate the data, to reduce multiple reallocations
|
||||||
|
// we use a smart resize algorithm based on the current filling ratio
|
||||||
|
// in addition, we use float to avoid integers overflows
|
||||||
|
float nnzEstimate = float(m_outerIndex[outer])*float(m_outerSize)/float(outer+1);
|
||||||
|
reallocRatio = (nnzEstimate-float(m_data.size()))/float(m_data.size());
|
||||||
|
// furthermore we bound the realloc ratio to:
|
||||||
|
// 1) reduce multiple minor realloc when the matrix is almost filled
|
||||||
|
// 2) avoid to allocate too much memory when the matrix is almost empty
|
||||||
|
reallocRatio = (std::min)((std::max)(reallocRatio,1.5f),8.f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_data.resize(m_data.size()+1,reallocRatio);
|
||||||
|
|
||||||
|
if (!isLastVec)
|
||||||
|
{
|
||||||
|
if (previousOuter==-1)
|
||||||
|
{
|
||||||
|
// oops wrong guess.
|
||||||
|
// let's correct the outer offsets
|
||||||
|
for (Index k=0; k<=(outer+1); ++k)
|
||||||
|
m_outerIndex[k] = 0;
|
||||||
|
Index k=outer+1;
|
||||||
|
while(m_outerIndex[k]==0)
|
||||||
|
m_outerIndex[k++] = 1;
|
||||||
|
while (k<=m_outerSize && m_outerIndex[k]!=0)
|
||||||
|
m_outerIndex[k++]++;
|
||||||
|
p = 0;
|
||||||
|
--k;
|
||||||
|
k = m_outerIndex[k]-1;
|
||||||
|
while (k>0)
|
||||||
|
{
|
||||||
|
m_data.index(k) = m_data.index(k-1);
|
||||||
|
m_data.value(k) = m_data.value(k-1);
|
||||||
|
k--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// we are not inserting into the last inner vec
|
||||||
|
// update outer indices:
|
||||||
|
Index j = outer+2;
|
||||||
|
while (j<=m_outerSize && m_outerIndex[j]!=0)
|
||||||
|
m_outerIndex[j++]++;
|
||||||
|
--j;
|
||||||
|
// shift data of last vecs:
|
||||||
|
Index k = m_outerIndex[j]-1;
|
||||||
|
while (k>=Index(p))
|
||||||
|
{
|
||||||
|
m_data.index(k) = m_data.index(k-1);
|
||||||
|
m_data.value(k) = m_data.value(k-1);
|
||||||
|
k--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while ( (p > startId) && (m_data.index(p-1) > inner) )
|
||||||
|
{
|
||||||
|
m_data.index(p) = m_data.index(p-1);
|
||||||
|
m_data.value(p) = m_data.value(p-1);
|
||||||
|
--p;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_data.index(p) = inner;
|
||||||
|
return (m_data.value(p) = 0);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_SPARSEMATRIX_H
|
#endif // EIGEN_SPARSEMATRIX_H
|
||||||
|
@ -309,30 +309,7 @@ class SparseVector
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DONT_INLINE SparseVector& assign(const SparseMatrixBase<OtherDerived>& _other)
|
EIGEN_DONT_INLINE SparseVector& assign(const SparseMatrixBase<OtherDerived>& _other);
|
||||||
{
|
|
||||||
const OtherDerived& other(_other.derived());
|
|
||||||
const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
|
|
||||||
if(needToTranspose)
|
|
||||||
{
|
|
||||||
Index size = other.size();
|
|
||||||
Index nnz = other.nonZeros();
|
|
||||||
resize(size);
|
|
||||||
reserve(nnz);
|
|
||||||
for(Index i=0; i<size; ++i)
|
|
||||||
{
|
|
||||||
typename OtherDerived::InnerIterator it(other, i);
|
|
||||||
if(it)
|
|
||||||
insert(i) = it.value();
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// there is no special optimization
|
|
||||||
return Base::operator=(other);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Storage m_data;
|
Storage m_data;
|
||||||
Index m_size;
|
Index m_size;
|
||||||
@ -402,6 +379,33 @@ class SparseVector<Scalar,_Options,_Index>::ReverseInnerIterator
|
|||||||
const Index m_start;
|
const Index m_start;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Scalar, int _Options, typename _Index>
|
||||||
|
template<typename OtherDerived>
|
||||||
|
EIGEN_DONT_INLINE SparseVector<Scalar,_Options,_Index>& SparseVector<Scalar,_Options,_Index>::assign(const SparseMatrixBase<OtherDerived>& _other)
|
||||||
|
{
|
||||||
|
const OtherDerived& other(_other.derived());
|
||||||
|
const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
|
||||||
|
if(needToTranspose)
|
||||||
|
{
|
||||||
|
Index size = other.size();
|
||||||
|
Index nnz = other.nonZeros();
|
||||||
|
resize(size);
|
||||||
|
reserve(nnz);
|
||||||
|
for(Index i=0; i<size; ++i)
|
||||||
|
{
|
||||||
|
typename OtherDerived::InnerIterator it(other, i);
|
||||||
|
if(it)
|
||||||
|
insert(i) = it.value();
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// there is no special optimization
|
||||||
|
return Base::operator=(other);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_SPARSEVECTOR_H
|
#endif // EIGEN_SPARSEVECTOR_H
|
||||||
|
Loading…
x
Reference in New Issue
Block a user