mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-16 10:01:49 +08:00
split sum away from redux and vectorize it.
(could come back to redux after it has been vectorized, and could serve as a starting point for that) also make the abs2 functor vectorizable (for real types).
This commit is contained in:
parent
8a967fb17c
commit
dc9206cec5
@ -48,6 +48,7 @@ namespace Eigen {
|
|||||||
#include "src/Core/Dot.h"
|
#include "src/Core/Dot.h"
|
||||||
#include "src/Core/DiagonalMatrix.h"
|
#include "src/Core/DiagonalMatrix.h"
|
||||||
#include "src/Core/DiagonalCoeffs.h"
|
#include "src/Core/DiagonalCoeffs.h"
|
||||||
|
#include "src/Core/Sum.h"
|
||||||
#include "src/Core/Redux.h"
|
#include "src/Core/Redux.h"
|
||||||
#include "src/Core/Visitor.h"
|
#include "src/Core/Visitor.h"
|
||||||
#include "src/Core/Fuzzy.h"
|
#include "src/Core/Fuzzy.h"
|
||||||
|
@ -31,19 +31,6 @@
|
|||||||
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
enum {
|
|
||||||
NoVectorization,
|
|
||||||
InnerVectorization,
|
|
||||||
LinearVectorization,
|
|
||||||
SliceVectorization
|
|
||||||
};
|
|
||||||
|
|
||||||
enum {
|
|
||||||
CompleteUnrolling,
|
|
||||||
InnerUnrolling,
|
|
||||||
NoUnrolling
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
template <typename Derived, typename OtherDerived>
|
||||||
struct ei_assign_traits
|
struct ei_assign_traits
|
||||||
{
|
{
|
||||||
|
@ -173,10 +173,13 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
|
|||||||
template<typename Scalar> struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT {
|
template<typename Scalar> struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT {
|
||||||
typedef typename NumTraits<Scalar>::Real result_type;
|
typedef typename NumTraits<Scalar>::Real result_type;
|
||||||
inline const result_type operator() (const Scalar& a) const { return ei_abs2(a); }
|
inline const result_type operator() (const Scalar& a) const { return ei_abs2(a); }
|
||||||
|
template<typename PacketScalar>
|
||||||
|
inline const PacketScalar packetOp(const PacketScalar& a) const
|
||||||
|
{ return ei_pmul(a,a); }
|
||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = NumTraits<Scalar>::IsComplex==false && int(ei_packet_traits<Scalar>::size)>1 }; };
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the conjugate of a complex value
|
* \brief Template functor to compute the conjugate of a complex value
|
||||||
@ -223,7 +226,7 @@ struct ei_functor_traits<ei_scalar_real_op<Scalar> >
|
|||||||
*
|
*
|
||||||
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
||||||
*/
|
*/
|
||||||
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_multiple_op;
|
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1) > struct ei_scalar_multiple_op;
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
struct ei_scalar_multiple_op<Scalar,true> {
|
struct ei_scalar_multiple_op<Scalar,true> {
|
||||||
|
@ -96,30 +96,6 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
|
|||||||
::run(derived(), func);
|
::run(derived(), func);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the sum of all coefficients of *this
|
|
||||||
*
|
|
||||||
* \sa trace()
|
|
||||||
*/
|
|
||||||
template<typename Derived>
|
|
||||||
inline typename ei_traits<Derived>::Scalar
|
|
||||||
MatrixBase<Derived>::sum() const
|
|
||||||
{
|
|
||||||
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
|
|
||||||
*
|
|
||||||
* \c *this can be any matrix, not necessarily square.
|
|
||||||
*
|
|
||||||
* \sa diagonal(), sum()
|
|
||||||
*/
|
|
||||||
template<typename Derived>
|
|
||||||
inline typename ei_traits<Derived>::Scalar
|
|
||||||
MatrixBase<Derived>::trace() const
|
|
||||||
{
|
|
||||||
return diagonal().sum();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \returns the minimum of all coefficients of *this
|
/** \returns the minimum of all coefficients of *this
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
|
282
Eigen/src/Core/Sum.h
Normal file
282
Eigen/src/Core/Sum.h
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef EIGEN_SUM_H
|
||||||
|
#define EIGEN_SUM_H
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
struct ei_sum_traits
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
Vectorization = (int(Derived::Flags)&PacketAccessBit)
|
||||||
|
&& (int(Derived::Flags)&LinearAccessBit)
|
||||||
|
? LinearVectorization
|
||||||
|
: NoVectorization
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum {
|
||||||
|
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
|
||||||
|
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
|
||||||
|
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
|
||||||
|
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
Unrolling = Cost <= UnrollingLimit
|
||||||
|
? CompleteUnrolling
|
||||||
|
: NoUnrolling
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* Part 2 : unrollers
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/*** no vectorization ***/
|
||||||
|
|
||||||
|
template<typename Derived, int Start, int Length>
|
||||||
|
struct ei_sum_novec_unroller
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
HalfLength = Length/2
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
|
||||||
|
inline static Scalar run(const Derived &mat)
|
||||||
|
{
|
||||||
|
return ei_sum_novec_unroller<Derived, Start, HalfLength>::run(mat)
|
||||||
|
+ ei_sum_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Derived, int Start>
|
||||||
|
struct ei_sum_novec_unroller<Derived, Start, 1>
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
col = Start / Derived::RowsAtCompileTime,
|
||||||
|
row = Start % Derived::RowsAtCompileTime
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
|
||||||
|
inline static Scalar run(const Derived &mat)
|
||||||
|
{
|
||||||
|
return mat.coeff(row, col);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*** vectorization ***/
|
||||||
|
|
||||||
|
template<typename Derived, int Index, int Stop,
|
||||||
|
bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived::Scalar>::size)>
|
||||||
|
struct ei_sum_vec_unroller
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
row = int(Derived::Flags)&RowMajorBit
|
||||||
|
? Index / int(Derived::ColsAtCompileTime)
|
||||||
|
: Index % Derived::RowsAtCompileTime,
|
||||||
|
col = int(Derived::Flags)&RowMajorBit
|
||||||
|
? Index % int(Derived::ColsAtCompileTime)
|
||||||
|
: Index / Derived::RowsAtCompileTime
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||||
|
|
||||||
|
inline static PacketScalar run(const Derived &mat)
|
||||||
|
{
|
||||||
|
return ei_padd(
|
||||||
|
mat.template packet<Aligned>(row, col),
|
||||||
|
ei_sum_vec_unroller<Derived, Index+ei_packet_traits<typename Derived::Scalar>::size, Stop>::run(mat)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Derived, int Index, int Stop>
|
||||||
|
struct ei_sum_vec_unroller<Derived, Index, Stop, true>
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
row = int(Derived::Flags)&RowMajorBit
|
||||||
|
? Index / int(Derived::ColsAtCompileTime)
|
||||||
|
: Index % Derived::RowsAtCompileTime,
|
||||||
|
col = int(Derived::Flags)&RowMajorBit
|
||||||
|
? Index % int(Derived::ColsAtCompileTime)
|
||||||
|
: Index / Derived::RowsAtCompileTime
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||||
|
|
||||||
|
inline static PacketScalar run(const Derived &mat)
|
||||||
|
{
|
||||||
|
return mat.template packet<Aligned>(row, col);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* Part 3 : implementation of all cases
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
template<typename Derived,
|
||||||
|
int Vectorization = ei_sum_traits<Derived>::Vectorization,
|
||||||
|
int Unrolling = ei_sum_traits<Derived>::Unrolling
|
||||||
|
>
|
||||||
|
struct ei_sum_impl;
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
static Scalar run(const Derived& mat)
|
||||||
|
{
|
||||||
|
Scalar res;
|
||||||
|
res = mat.coeff(0, 0);
|
||||||
|
for(int i = 1; i < mat.rows(); i++)
|
||||||
|
res += mat.coeff(i, 0);
|
||||||
|
for(int j = 1; j < mat.cols(); j++)
|
||||||
|
for(int i = 0; i < mat.rows(); i++)
|
||||||
|
res += mat.coeff(i, j);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling>
|
||||||
|
: public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
|
||||||
|
{};
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||||
|
|
||||||
|
static Scalar run(const Derived& mat)
|
||||||
|
{
|
||||||
|
const int size = mat.size();
|
||||||
|
const int packetSize = ei_packet_traits<typename Derived::Scalar>::size;
|
||||||
|
const int alignedSize = (size/packetSize)*packetSize;
|
||||||
|
const bool rowMajor = Derived::Flags&RowMajorBit;
|
||||||
|
const int innerSize = rowMajor ? mat.cols() : mat.rows();
|
||||||
|
const int outerSize = rowMajor ? mat.rows() : mat.cols();
|
||||||
|
Scalar res;
|
||||||
|
|
||||||
|
// do the vectorizable part of the sum
|
||||||
|
if(size >= packetSize)
|
||||||
|
{
|
||||||
|
PacketScalar packet_res;
|
||||||
|
packet_res = mat.template packet<Aligned>(0, 0);
|
||||||
|
int index;
|
||||||
|
for(index = packetSize; index<alignedSize ; index+=packetSize)
|
||||||
|
{
|
||||||
|
// FIXME the following is not really efficient
|
||||||
|
const int row = rowMajor ? index/innerSize : index%innerSize;
|
||||||
|
const int col = rowMajor ? index%innerSize : index/innerSize;
|
||||||
|
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
|
||||||
|
}
|
||||||
|
res = ei_predux(packet_res);
|
||||||
|
|
||||||
|
// now we must do the rest without vectorization.
|
||||||
|
if(alignedSize == size) return res;
|
||||||
|
}
|
||||||
|
else // too small to vectorize anything.
|
||||||
|
// since this is dynamic-size hence inefficient anyway, don't try to optimize.
|
||||||
|
{
|
||||||
|
res = Scalar(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const int k = alignedSize/innerSize;
|
||||||
|
|
||||||
|
// do the remainder of the current row or col
|
||||||
|
for(int i = alignedSize%innerSize; i < innerSize; i++)
|
||||||
|
{
|
||||||
|
const int row = rowMajor ? k : i;
|
||||||
|
const int col = rowMajor ? i : k;
|
||||||
|
res += mat.coeff(row, col);
|
||||||
|
}
|
||||||
|
|
||||||
|
// do the remaining rows or cols
|
||||||
|
for(int j = k+1; j < outerSize; j++)
|
||||||
|
for(int i = 0; i < innerSize; i++)
|
||||||
|
{
|
||||||
|
const int row = rowMajor ? i : j;
|
||||||
|
const int col = rowMajor ? j : i;
|
||||||
|
res += mat.coeff(row, col);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
static Scalar run(const Derived& mat)
|
||||||
|
{
|
||||||
|
return ei_predux(
|
||||||
|
ei_sum_vec_unroller<Derived, 0, Derived::SizeAtCompileTime>::run(mat)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* Part 4 : implementation of MatrixBase methods
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/** \returns the sum of all coefficients of *this
|
||||||
|
*
|
||||||
|
* \sa trace()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
inline typename ei_traits<Derived>::Scalar
|
||||||
|
MatrixBase<Derived>::sum() const
|
||||||
|
{
|
||||||
|
return ei_sum_impl<Derived>::run(derived());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
|
||||||
|
*
|
||||||
|
* \c *this can be any matrix, not necessarily square.
|
||||||
|
*
|
||||||
|
* \sa diagonal(), sum()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
inline typename ei_traits<Derived>::Scalar
|
||||||
|
MatrixBase<Derived>::trace() const
|
||||||
|
{
|
||||||
|
return diagonal().sum();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif // EIGEN_SUM_H
|
@ -142,5 +142,18 @@ enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
|
|||||||
enum DirectionType { Vertical, Horizontal };
|
enum DirectionType { Vertical, Horizontal };
|
||||||
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct };
|
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct };
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NoVectorization,
|
||||||
|
InnerVectorization,
|
||||||
|
LinearVectorization,
|
||||||
|
SliceVectorization
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
CompleteUnrolling,
|
||||||
|
InnerUnrolling,
|
||||||
|
NoUnrolling
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif // EIGEN_CONSTANTS_H
|
#endif // EIGEN_CONSTANTS_H
|
||||||
|
Loading…
x
Reference in New Issue
Block a user