mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 01:59:38 +08:00
split sum away from redux and vectorize it.
(could come back to redux after it has been vectorized, and could serve as a starting point for that) also make the abs2 functor vectorizable (for real types).
This commit is contained in:
parent
8a967fb17c
commit
dc9206cec5
@ -48,6 +48,7 @@ namespace Eigen {
|
||||
#include "src/Core/Dot.h"
|
||||
#include "src/Core/DiagonalMatrix.h"
|
||||
#include "src/Core/DiagonalCoeffs.h"
|
||||
#include "src/Core/Sum.h"
|
||||
#include "src/Core/Redux.h"
|
||||
#include "src/Core/Visitor.h"
|
||||
#include "src/Core/Fuzzy.h"
|
||||
|
@ -31,19 +31,6 @@
|
||||
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
||||
***************************************************************************/
|
||||
|
||||
enum {
|
||||
NoVectorization,
|
||||
InnerVectorization,
|
||||
LinearVectorization,
|
||||
SliceVectorization
|
||||
};
|
||||
|
||||
enum {
|
||||
CompleteUnrolling,
|
||||
InnerUnrolling,
|
||||
NoUnrolling
|
||||
};
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
struct ei_assign_traits
|
||||
{
|
||||
|
@ -173,10 +173,13 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
|
||||
template<typename Scalar> struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT {
|
||||
typedef typename NumTraits<Scalar>::Real result_type;
|
||||
inline const result_type operator() (const Scalar& a) const { return ei_abs2(a); }
|
||||
template<typename PacketScalar>
|
||||
inline const PacketScalar packetOp(const PacketScalar& a) const
|
||||
{ return ei_pmul(a,a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct ei_functor_traits<ei_scalar_abs2_op<Scalar> >
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = false }; };
|
||||
{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = NumTraits<Scalar>::IsComplex==false && int(ei_packet_traits<Scalar>::size)>1 }; };
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate of a complex value
|
||||
@ -223,7 +226,7 @@ struct ei_functor_traits<ei_scalar_real_op<Scalar> >
|
||||
*
|
||||
* \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
|
||||
*/
|
||||
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1?true:false) > struct ei_scalar_multiple_op;
|
||||
template<typename Scalar, bool PacketAccess = (int(ei_packet_traits<Scalar>::size)>1) > struct ei_scalar_multiple_op;
|
||||
|
||||
template<typename Scalar>
|
||||
struct ei_scalar_multiple_op<Scalar,true> {
|
||||
|
@ -96,30 +96,6 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
|
||||
::run(derived(), func);
|
||||
}
|
||||
|
||||
/** \returns the sum of all coefficients of *this
|
||||
*
|
||||
* \sa trace()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::sum() const
|
||||
{
|
||||
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
|
||||
*
|
||||
* \c *this can be any matrix, not necessarily square.
|
||||
*
|
||||
* \sa diagonal(), sum()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::trace() const
|
||||
{
|
||||
return diagonal().sum();
|
||||
}
|
||||
|
||||
/** \returns the minimum of all coefficients of *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
|
282
Eigen/src/Core/Sum.h
Normal file
282
Eigen/src/Core/Sum.h
Normal file
@ -0,0 +1,282 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra. Eigen itself is part of the KDE project.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||
//
|
||||
// Eigen is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
// License as published by the Free Software Foundation; either
|
||||
// version 3 of the License, or (at your option) any later version.
|
||||
//
|
||||
// Alternatively, you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation; either version 2 of
|
||||
// the License, or (at your option) any later version.
|
||||
//
|
||||
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License and a copy of the GNU General Public License along with
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#ifndef EIGEN_SUM_H
|
||||
#define EIGEN_SUM_H
|
||||
|
||||
/***************************************************************************
|
||||
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Derived>
|
||||
struct ei_sum_traits
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
Vectorization = (int(Derived::Flags)&PacketAccessBit)
|
||||
&& (int(Derived::Flags)&LinearAccessBit)
|
||||
? LinearVectorization
|
||||
: NoVectorization
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
|
||||
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
|
||||
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Unrolling = Cost <= UnrollingLimit
|
||||
? CompleteUnrolling
|
||||
: NoUnrolling
|
||||
};
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 2 : unrollers
|
||||
***************************************************************************/
|
||||
|
||||
/*** no vectorization ***/
|
||||
|
||||
template<typename Derived, int Start, int Length>
|
||||
struct ei_sum_novec_unroller
|
||||
{
|
||||
enum {
|
||||
HalfLength = Length/2
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
inline static Scalar run(const Derived &mat)
|
||||
{
|
||||
return ei_sum_novec_unroller<Derived, Start, HalfLength>::run(mat)
|
||||
+ ei_sum_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, int Start>
|
||||
struct ei_sum_novec_unroller<Derived, Start, 1>
|
||||
{
|
||||
enum {
|
||||
col = Start / Derived::RowsAtCompileTime,
|
||||
row = Start % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
inline static Scalar run(const Derived &mat)
|
||||
{
|
||||
return mat.coeff(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
/*** vectorization ***/
|
||||
|
||||
template<typename Derived, int Index, int Stop,
|
||||
bool LastPacket = (Stop-Index == ei_packet_traits<typename Derived::Scalar>::size)>
|
||||
struct ei_sum_vec_unroller
|
||||
{
|
||||
enum {
|
||||
row = int(Derived::Flags)&RowMajorBit
|
||||
? Index / int(Derived::ColsAtCompileTime)
|
||||
: Index % Derived::RowsAtCompileTime,
|
||||
col = int(Derived::Flags)&RowMajorBit
|
||||
? Index % int(Derived::ColsAtCompileTime)
|
||||
: Index / Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
inline static PacketScalar run(const Derived &mat)
|
||||
{
|
||||
return ei_padd(
|
||||
mat.template packet<Aligned>(row, col),
|
||||
ei_sum_vec_unroller<Derived, Index+ei_packet_traits<typename Derived::Scalar>::size, Stop>::run(mat)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, int Index, int Stop>
|
||||
struct ei_sum_vec_unroller<Derived, Index, Stop, true>
|
||||
{
|
||||
enum {
|
||||
row = int(Derived::Flags)&RowMajorBit
|
||||
? Index / int(Derived::ColsAtCompileTime)
|
||||
: Index % Derived::RowsAtCompileTime,
|
||||
col = int(Derived::Flags)&RowMajorBit
|
||||
? Index % int(Derived::ColsAtCompileTime)
|
||||
: Index / Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
inline static PacketScalar run(const Derived &mat)
|
||||
{
|
||||
return mat.template packet<Aligned>(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 3 : implementation of all cases
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Derived,
|
||||
int Vectorization = ei_sum_traits<Derived>::Vectorization,
|
||||
int Unrolling = ei_sum_traits<Derived>::Unrolling
|
||||
>
|
||||
struct ei_sum_impl;
|
||||
|
||||
template<typename Derived>
|
||||
struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
static Scalar run(const Derived& mat)
|
||||
{
|
||||
Scalar res;
|
||||
res = mat.coeff(0, 0);
|
||||
for(int i = 1; i < mat.rows(); i++)
|
||||
res += mat.coeff(i, 0);
|
||||
for(int j = 1; j < mat.cols(); j++)
|
||||
for(int i = 0; i < mat.rows(); i++)
|
||||
res += mat.coeff(i, j);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling>
|
||||
: public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
|
||||
{};
|
||||
|
||||
template<typename Derived>
|
||||
struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
static Scalar run(const Derived& mat)
|
||||
{
|
||||
const int size = mat.size();
|
||||
const int packetSize = ei_packet_traits<typename Derived::Scalar>::size;
|
||||
const int alignedSize = (size/packetSize)*packetSize;
|
||||
const bool rowMajor = Derived::Flags&RowMajorBit;
|
||||
const int innerSize = rowMajor ? mat.cols() : mat.rows();
|
||||
const int outerSize = rowMajor ? mat.rows() : mat.cols();
|
||||
Scalar res;
|
||||
|
||||
// do the vectorizable part of the sum
|
||||
if(size >= packetSize)
|
||||
{
|
||||
PacketScalar packet_res;
|
||||
packet_res = mat.template packet<Aligned>(0, 0);
|
||||
int index;
|
||||
for(index = packetSize; index<alignedSize ; index+=packetSize)
|
||||
{
|
||||
// FIXME the following is not really efficient
|
||||
const int row = rowMajor ? index/innerSize : index%innerSize;
|
||||
const int col = rowMajor ? index%innerSize : index/innerSize;
|
||||
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(row, col));
|
||||
}
|
||||
res = ei_predux(packet_res);
|
||||
|
||||
// now we must do the rest without vectorization.
|
||||
if(alignedSize == size) return res;
|
||||
}
|
||||
else // too small to vectorize anything.
|
||||
// since this is dynamic-size hence inefficient anyway, don't try to optimize.
|
||||
{
|
||||
res = Scalar(0);
|
||||
}
|
||||
|
||||
const int k = alignedSize/innerSize;
|
||||
|
||||
// do the remainder of the current row or col
|
||||
for(int i = alignedSize%innerSize; i < innerSize; i++)
|
||||
{
|
||||
const int row = rowMajor ? k : i;
|
||||
const int col = rowMajor ? i : k;
|
||||
res += mat.coeff(row, col);
|
||||
}
|
||||
|
||||
// do the remaining rows or cols
|
||||
for(int j = k+1; j < outerSize; j++)
|
||||
for(int i = 0; i < innerSize; i++)
|
||||
{
|
||||
const int row = rowMajor ? i : j;
|
||||
const int col = rowMajor ? j : i;
|
||||
res += mat.coeff(row, col);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
static Scalar run(const Derived& mat)
|
||||
{
|
||||
return ei_predux(
|
||||
ei_sum_vec_unroller<Derived, 0, Derived::SizeAtCompileTime>::run(mat)
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 4 : implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/** \returns the sum of all coefficients of *this
|
||||
*
|
||||
* \sa trace()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::sum() const
|
||||
{
|
||||
return ei_sum_impl<Derived>::run(derived());
|
||||
}
|
||||
|
||||
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
|
||||
*
|
||||
* \c *this can be any matrix, not necessarily square.
|
||||
*
|
||||
* \sa diagonal(), sum()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::trace() const
|
||||
{
|
||||
return diagonal().sum();
|
||||
}
|
||||
|
||||
|
||||
#endif // EIGEN_SUM_H
|
@ -142,5 +142,18 @@ enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
|
||||
enum DirectionType { Vertical, Horizontal };
|
||||
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct };
|
||||
|
||||
enum {
|
||||
NoVectorization,
|
||||
InnerVectorization,
|
||||
LinearVectorization,
|
||||
SliceVectorization
|
||||
};
|
||||
|
||||
enum {
|
||||
CompleteUnrolling,
|
||||
InnerUnrolling,
|
||||
NoUnrolling
|
||||
};
|
||||
|
||||
|
||||
#endif // EIGEN_CONSTANTS_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user