* exit Sum.h, exit Prod.h, welcome vectorization of redux() !

* add vectorization for minCoeff and maxCoeff
This commit is contained in:
Gael Guennebaud 2009-02-12 15:18:59 +00:00
parent dc97d483fd
commit 51c991af45
13 changed files with 497 additions and 760 deletions

View File

@ -138,8 +138,6 @@ namespace Eigen {
#include "src/Core/Transpose.h"
#include "src/Core/DiagonalMatrix.h"
#include "src/Core/DiagonalCoeffs.h"
#include "src/Core/Sum.h"
#include "src/Core/Prod.h"
#include "src/Core/Redux.h"
#include "src/Core/Visitor.h"
#include "src/Core/Fuzzy.h"

View File

@ -37,6 +37,9 @@ template<typename Scalar> struct ei_scalar_sum_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_padd(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_sum_op<Scalar> > {
@ -56,6 +59,9 @@ template<typename Scalar> struct ei_scalar_product_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_pmul(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux_mul(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_product_op<Scalar> > {
@ -75,6 +81,9 @@ template<typename Scalar> struct ei_scalar_min_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_pmin(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux_min(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_min_op<Scalar> > {
@ -94,6 +103,9 @@ template<typename Scalar> struct ei_scalar_max_op EIGEN_EMPTY_STRUCT {
template<typename PacketScalar>
EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const
{ return ei_pmax(a,b); }
template<typename PacketScalar>
EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const
{ return ei_predux_max(a); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_max_op<Scalar> > {
@ -123,7 +135,7 @@ template<typename Scalar> struct ei_scalar_hypot_op EIGEN_EMPTY_STRUCT {
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_hypot_op<Scalar> > {
enum { Cost = 5 * NumTraits<Scalar>::MulCost };
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
};
// other binary functors:
@ -197,7 +209,7 @@ struct ei_functor_traits<ei_scalar_abs_op<Scalar> >
{
enum {
Cost = NumTraits<Scalar>::AddCost,
PacketAccess = false // this could actually be vectorized with SSSE3.
PacketAccess = false // FIXME this could actually be vectorized with SSSE3.
};
};

View File

@ -100,6 +100,14 @@ template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_pr
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a)
{ return a; }
/** \internal \returns the min of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_min(const Packet& a)
{ return a; }
/** \internal \returns the max of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_max(const Packet& a)
{ return a; }
/** \internal \returns the reversed elements of \a a*/
template<typename Packet> inline Packet ei_preverse(const Packet& a)
{ return a; }

View File

@ -1,262 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_PROD_H
#define EIGEN_PROD_H
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
template<typename Derived>
struct ei_prod_traits
{
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
public:
enum {
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
&& (int(Derived::Flags)&LinearAccessBit)
? LinearVectorization
: NoVectorization
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::MulCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
};
public:
enum {
Unrolling = Cost <= UnrollingLimit
? CompleteUnrolling
: NoUnrolling
};
};
/***************************************************************************
* Part 2 : unrollers
***************************************************************************/
/*** no vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_prod_novec_unroller
{
enum {
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return ei_prod_novec_unroller<Derived, Start, HalfLength>::run(mat)
* ei_prod_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
}
};
template<typename Derived, int Start>
struct ei_prod_novec_unroller<Derived, Start, 1>
{
enum {
col = Start / Derived::RowsAtCompileTime,
row = Start % Derived::RowsAtCompileTime
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return mat.coeff(row, col);
}
};
/*** vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_prod_vec_unroller
{
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return ei_pmul(
ei_prod_vec_unroller<Derived, Start, HalfLength>::run(mat),
ei_prod_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
}
};
template<typename Derived, int Start>
struct ei_prod_vec_unroller<Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
? index / int(Derived::ColsAtCompileTime)
: index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
? index % int(Derived::ColsAtCompileTime)
: index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return mat.template packet<alignment>(row, col);
}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Derived,
int Vectorization = ei_prod_traits<Derived>::Vectorization,
int Unrolling = ei_prod_traits<Derived>::Unrolling
>
struct ei_prod_impl;
template<typename Derived>
struct ei_prod_impl<Derived, NoVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
res = mat.coeff(0, 0);
for(int i = 1; i < mat.rows(); ++i)
res *= mat.coeff(i, 0);
for(int j = 1; j < mat.cols(); ++j)
for(int i = 0; i < mat.rows(); ++i)
res *= mat.coeff(i, j);
return res;
}
};
template<typename Derived>
struct ei_prod_impl<Derived, NoVectorization, CompleteUnrolling>
: public ei_prod_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Derived>
struct ei_prod_impl<Derived, LinearVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat)
{
const int size = mat.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedStart = (Derived::Flags & AlignedBit)
|| !(Derived::Flags & DirectAccessBit)
? 0
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const int alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = ei_pmul(packet_res, mat.template packet<alignment>(index));
res = ei_predux_mul(packet_res);
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = Scalar(1);
}
for(int index = 0; index < alignedStart; ++index)
res *= mat.coeff(index);
for(int index = alignedEnd; index < size; ++index)
res *= mat.coeff(index);
return res;
}
};
template<typename Derived>
struct ei_prod_impl<Derived, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived& mat)
{
Scalar res = ei_predux_mul(ei_prod_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
if (VectorizationSize != Size)
res *= ei_prod_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
return res;
}
};
/***************************************************************************
* Part 4 : implementation of MatrixBase methods
***************************************************************************/
/** \returns the product of all coefficients of *this
*
* Example: \include MatrixBase_prod.cpp
* Output: \verbinclude MatrixBase_prod.out
*
* \sa sum()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::prod() const
{
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
return ei_prod_impl<ThisNested>::run(derived());
}
#endif // EIGEN_PROD_H

View File

@ -26,48 +26,147 @@
#ifndef EIGEN_REDUX_H
#define EIGEN_REDUX_H
template<typename BinaryOp, typename Derived, int Start, int Length>
struct ei_redux_impl
// TODO
// * implement other kind of vectorization
// * factorize code
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
template<typename Func, typename Derived>
struct ei_redux_traits
{
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
public:
enum {
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
&& (int(Derived::Flags)&LinearAccessBit)
&& (ei_functor_traits<Func>::PacketAccess)
? LinearVectorization
: NoVectorization
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
};
public:
enum {
Unrolling = Cost <= UnrollingLimit
? CompleteUnrolling
: NoUnrolling
};
};
/***************************************************************************
* Part 2 : unrollers
***************************************************************************/
/*** no vectorization ***/
template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_novec_unroller
{
enum {
HalfLength = Length/2
};
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived &mat, const BinaryOp& func)
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func)
{
return func(
ei_redux_impl<BinaryOp, Derived, Start, HalfLength>::run(mat, func),
ei_redux_impl<BinaryOp, Derived, Start+HalfLength, Length - HalfLength>::run(mat, func));
return func(ei_redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
}
};
template<typename BinaryOp, typename Derived, int Start>
struct ei_redux_impl<BinaryOp, Derived, Start, 1>
template<typename Func, typename Derived, int Start>
struct ei_redux_novec_unroller<Func, Derived, Start, 1>
{
enum {
col = Start / Derived::RowsAtCompileTime,
row = Start % Derived::RowsAtCompileTime
};
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived &mat, const BinaryOp &)
EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&)
{
return mat.coeff(row, col);
}
};
template<typename BinaryOp, typename Derived, int Start>
struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
/*** vectorization ***/
template<typename Func, typename Derived, int Start, int Length>
struct ei_redux_vec_unroller
{
typedef typename ei_result_of<BinaryOp(typename Derived::Scalar)>::type Scalar;
static Scalar run(const Derived& mat, const BinaryOp& func)
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func)
{
return func.packetOp(
ei_redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
ei_redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
}
};
template<typename Func, typename Derived, int Start>
struct ei_redux_vec_unroller<Func, Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
? index / int(Derived::ColsAtCompileTime)
: index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
? index % int(Derived::ColsAtCompileTime)
: index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&)
{
return mat.template packet<alignment>(row, col);
}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Func, typename Derived,
int Vectorization = ei_redux_traits<Func, Derived>::Vectorization,
int Unrolling = ei_redux_traits<Func, Derived>::Unrolling
>
struct ei_redux_impl;
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat, const Func& func)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
res = mat.coeff(0,0);
res = mat.coeff(0, 0);
for(int i = 1; i < mat.rows(); ++i)
res = func(res, mat.coeff(i, 0));
for(int j = 1; j < mat.cols(); ++j)
@ -77,6 +176,77 @@ struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
}
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func,Derived, NoVectorization, CompleteUnrolling>
: public ei_redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat, const Func& func)
{
const int size = mat.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedStart = (Derived::Flags & AlignedBit)
|| !(Derived::Flags & DirectAccessBit)
? 0
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const int alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = func.packetOp(packet_res, mat.template packet<alignment>(index));
res = func.predux(packet_res);
for(int index = 0; index < alignedStart; ++index)
res = func(res,mat.coeff(index));
for(int index = alignedEnd; index < size; ++index)
res = func(res,mat.coeff(index));
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = mat.coeff(0);
for(int index = 1; index < size; ++index)
res = func(res,mat.coeff(index));
}
return res;
}
};
template<typename Func, typename Derived>
struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func)
{
Scalar res = func.predux(ei_redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
if (VectorizationSize != Size)
res = func(res,ei_redux_novec_unroller<Func, Derived, VectorizationSize, Size-VectorizationSize>::run(mat,func));
return res;
}
};
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
*
* The template parameter \a BinaryOp is the type of the functor \a func which must be
@ -85,22 +255,20 @@ struct ei_redux_impl<BinaryOp, Derived, Start, Dynamic>
* \sa MatrixBase::sum(), MatrixBase::minCoeff(), MatrixBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
*/
template<typename Derived>
template<typename BinaryOp>
typename ei_result_of<BinaryOp(typename ei_traits<Derived>::Scalar)>::type
MatrixBase<Derived>::redux(const BinaryOp& func) const
template<typename Func>
inline typename ei_result_of<Func(typename ei_traits<Derived>::Scalar)>::type
MatrixBase<Derived>::redux(const Func& func) const
{
const bool unroll = SizeAtCompileTime * CoeffReadCost
+ (SizeAtCompileTime-1) * ei_functor_traits<BinaryOp>::Cost
<= EIGEN_UNROLLING_LIMIT;
typename Derived::Nested nested(derived());
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
return ei_redux_impl<BinaryOp, ThisNested, 0, unroll ? int(SizeAtCompileTime) : Dynamic>
::run(derived(), func);
return ei_redux_impl<Func, ThisNested>
::run(nested, func);
}
/** \returns the minimum of all coefficients of *this
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::minCoeff() const
{
return this->redux(Eigen::ei_scalar_min_op<Scalar>());
@ -109,10 +277,48 @@ MatrixBase<Derived>::minCoeff() const
/** \returns the maximum of all coefficients of *this
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::maxCoeff() const
{
return this->redux(Eigen::ei_scalar_max_op<Scalar>());
}
/** \returns the sum of all coefficients of *this
*
* \sa trace(), prod()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::sum() const
{
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
}
/** \returns the product of all coefficients of *this
*
* Example: \include MatrixBase_prod.cpp
* Output: \verbinclude MatrixBase_prod.out
*
* \sa sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::prod() const
{
return this->redux(Eigen::ei_scalar_product_op<Scalar>());
}
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
*
* \c *this can be any matrix, not necessarily square.
*
* \sa diagonal(), sum()
*/
template<typename Derived>
EIGEN_STRONG_INLINE typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::trace() const
{
return diagonal().sum();
}
#endif // EIGEN_REDUX_H

View File

@ -1,272 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_SUM_H
#define EIGEN_SUM_H
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
template<typename Derived>
struct ei_sum_traits
{
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
public:
enum {
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
&& (int(Derived::Flags)&LinearAccessBit)
? LinearVectorization
: NoVectorization
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
};
public:
enum {
Unrolling = Cost <= UnrollingLimit
? CompleteUnrolling
: NoUnrolling
};
};
/***************************************************************************
* Part 2 : unrollers
***************************************************************************/
/*** no vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_sum_novec_unroller
{
enum {
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return ei_sum_novec_unroller<Derived, Start, HalfLength>::run(mat)
+ ei_sum_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
}
};
template<typename Derived, int Start>
struct ei_sum_novec_unroller<Derived, Start, 1>
{
enum {
col = Start / Derived::RowsAtCompileTime,
row = Start % Derived::RowsAtCompileTime
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return mat.coeff(row, col);
}
};
/*** vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_sum_vec_unroller
{
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return ei_padd(
ei_sum_vec_unroller<Derived, Start, HalfLength>::run(mat),
ei_sum_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
}
};
template<typename Derived, int Start>
struct ei_sum_vec_unroller<Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
? index / int(Derived::ColsAtCompileTime)
: index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
? index % int(Derived::ColsAtCompileTime)
: index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return mat.template packet<alignment>(row, col);
}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Derived,
int Vectorization = ei_sum_traits<Derived>::Vectorization,
int Unrolling = ei_sum_traits<Derived>::Unrolling
>
struct ei_sum_impl;
template<typename Derived>
struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
res = mat.coeff(0, 0);
for(int i = 1; i < mat.rows(); ++i)
res += mat.coeff(i, 0);
for(int j = 1; j < mat.cols(); ++j)
for(int i = 0; i < mat.rows(); ++i)
res += mat.coeff(i, j);
return res;
}
};
template<typename Derived>
struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling>
: public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Derived>
struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat)
{
const int size = mat.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedStart = (Derived::Flags & AlignedBit)
|| !(Derived::Flags & DirectAccessBit)
? 0
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const int alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = ei_padd(packet_res, mat.template packet<alignment>(index));
res = ei_predux(packet_res);
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = Scalar(0);
}
for(int index = 0; index < alignedStart; ++index)
res += mat.coeff(index);
for(int index = alignedEnd; index < size; ++index)
res += mat.coeff(index);
return res;
}
};
template<typename Derived>
struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived& mat)
{
Scalar res = ei_predux(ei_sum_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
if (VectorizationSize != Size)
res += ei_sum_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
return res;
}
};
/***************************************************************************
* Part 4 : implementation of MatrixBase methods
***************************************************************************/
/** \returns the sum of all coefficients of *this
*
* \sa trace(), prod()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::sum() const
{
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
return ei_sum_impl<ThisNested>::run(derived());
}
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
*
* \c *this can be any matrix, not necessarily square.
*
* \sa diagonal(), sum()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::trace() const
{
return diagonal().sum();
}
#endif // EIGEN_SUM_H

View File

@ -177,7 +177,7 @@ template<> inline v4f ei_ploadu(const float* from)
return (v4f) vec_perm(MSQ, LSQ, mask); // align the data
}
template<> inline v4i ei_ploadu(const int* from)
template<> inline v4i ei_ploadu(const int* from)
{
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
__vector unsigned char MSQ, LSQ;
@ -198,7 +198,7 @@ template<> inline v4f ei_pset1(const float& from)
return vc;
}
template<> inline v4i ei_pset1(const int& from)
template<> inline v4i ei_pset1(const int& from)
{
int __attribute__(aligned(16)) ai[4];
ai[0] = from;
@ -248,26 +248,28 @@ template<> inline void ei_pstoreu(int* to , const v4i& from )
template<> inline float ei_pfirst(const v4f& a)
{
float __attribute__(aligned(16)) af[4];
float EIGEN_ALIGN_128 af[4];
vec_st(a, 0, af);
return af[0];
}
template<> inline int ei_pfirst(const v4i& a)
{
int __attribute__(aligned(16)) ai[4];
int EIGEN_ALIGN_128 ai[4];
vec_st(a, 0, ai);
return ai[0];
}
template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a)
{
static const __vector unsigned char reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static const __vector unsigned char reverse_mask =
{12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
}
template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a)
{
static const __vector unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
static const __vector unsigned char __attribute__(aligned(16)) reverse_mask =
{12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask);
}
@ -344,26 +346,59 @@ inline int ei_predux(const v4i& a)
return ei_pfirst(sum);
}
// implement other reductions operators
inline float ei_predux_mul(const v4f& a)
{
v4f b, sum;
b = (v4f)vec_sld(a, a, 8);
sum = ei_pmul(a, b);
b = (v4f)vec_sld(sum, sum, 4);
sum = ei_pmul(sum, b);
return ei_pfirst(sum);
v4f prod;
prod = ei_pmul(a, (v4f)vec_sld(a, a, 8));
return ei_pfirst(ei_pmul(prod, (v4f)vec_sld(prod, prod, 4)));
}
inline int ei_predux_mul(const v4i& a)
{
v4i b, sum;
b = (v4i)vec_sld(a, a, 8);
sum = ei_pmul(a, b);
b = (v4i)vec_sld(sum, sum, 4);
sum = ei_pmul(sum, b);
return ei_pfirst(sum);
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
return aux[0] * aux[1] * aux[2] * aux[3];
}
inline float ei_predux_min(const v4f& a)
{
EIGEN_ALIGN_128 float aux[4];
ei_pstore(aux, a);
register float aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register float aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2;
}
inline int ei_predux_min(const v4i& a)
{
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2;
}
inline float ei_predux_max(const v4f& a)
{
EIGEN_ALIGN_128 float aux[4];
ei_pstore(aux, a);
register float aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register float aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2;
}
inline int ei_predux_max(const v4i& a)
{
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2;
}
template<int Offset>
struct ei_palign_impl<Offset, v4f>
{

View File

@ -53,6 +53,7 @@ template<> EIGEN_STRONG_INLINE __m128 ei_pmul<__m128>(const __m128& a, const _
template<> EIGEN_STRONG_INLINE __m128d ei_pmul<__m128d>(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE __m128i ei_pmul<__m128i>(const __m128i& a, const __m128i& b)
{
// this version is very slightly faster than 4 scalar products
return _mm_or_si128(
_mm_and_si128(
_mm_mul_epu32(a,b),
@ -76,18 +77,18 @@ template<> EIGEN_STRONG_INLINE __m128i ei_pmadd(const __m128i& a, const __m128i&
template<> EIGEN_STRONG_INLINE __m128 ei_pmin<__m128>(const __m128& a, const __m128& b) { return _mm_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_pmin<__m128d>(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); }
// FIXME this vectorized min operator is likely to be slower than the standard one
template<> EIGEN_STRONG_INLINE __m128i ei_pmin<__m128i>(const __m128i& a, const __m128i& b)
{
// after some bench, this version *is* faster than a scalar implementation
__m128i mask = _mm_cmplt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
}
template<> EIGEN_STRONG_INLINE __m128 ei_pmax<__m128>(const __m128& a, const __m128& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE __m128d ei_pmax<__m128d>(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); }
// FIXME this vectorized max operator is likely to be slower than the standard one
template<> EIGEN_STRONG_INLINE __m128i ei_pmax<__m128i>(const __m128i& a, const __m128i& b)
{
// after some bench, this version *is* faster than a scalar implementation
__m128i mask = _mm_cmpgt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
}
@ -216,6 +217,7 @@ template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs)
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float ei_predux_mul<__m128>(const __m128& a)
{
__m128 tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
@ -227,8 +229,54 @@ template<> EIGEN_STRONG_INLINE double ei_predux_mul<__m128d>(const __m128d& a)
}
template<> EIGEN_STRONG_INLINE int ei_predux_mul<__m128i>(const __m128i& a)
{
__m128i tmp = ei_pmul(a, _mm_unpackhi_epi64(a,a));
return ei_pfirst(tmp) * ei_pfirst(_mm_shuffle_epi32(tmp, 1));
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., reusing ei_pmul is very slow !)
// TODO try to call _mm_mul_epu32 directly
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
}
// min
template<> EIGEN_STRONG_INLINE float ei_predux_min<__m128>(const __m128& a)
{
__m128 tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_min<__m128d>(const __m128d& a)
{
return ei_pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_min<__m128i>(const __m128i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2;
}
// max
template<> EIGEN_STRONG_INLINE float ei_predux_max<__m128>(const __m128& a)
{
__m128 tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_max<__m128d>(const __m128d& a)
{
return ei_pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_max<__m128i>(const __m128i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
EIGEN_ALIGN_128 int aux[4];
ei_pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2;
}
#if (defined __GNUC__)

View File

@ -111,7 +111,7 @@ ei_add_test(vectorization_logic)
ei_add_test(basicstuff)
ei_add_test(linearstructure)
ei_add_test(cwiseop)
ei_add_test(sum)
ei_add_test(redux)
ei_add_test(product_small)
ei_add_test(product_large ${EI_OFLAG})
ei_add_test(adjoint)
@ -149,7 +149,6 @@ ei_add_test(sparse_basic)
ei_add_test(sparse_product)
ei_add_test(sparse_solvers " " "${SPARSE_LIBS}")
ei_add_test(reverse)
ei_add_test(prod)
# print a summary of the different options
message("************************************************************")

View File

@ -129,6 +129,16 @@ template<typename Scalar> void packetmath()
for (int i=0; i<PacketSize; ++i)
ref[0] *= data1[i];
VERIFY(ei_isApprox(ref[0], ei_predux_mul(ei_pload(data1))) && "ei_predux_mul");
ref[0] = data1[0];
for (int i=0; i<PacketSize; ++i)
ref[0] = std::min(ref[0],data1[i]);
VERIFY(ei_isApprox(ref[0], ei_predux_min(ei_pload(data1))) && "ei_predux_min");
ref[0] = data1[0];
for (int i=0; i<PacketSize; ++i)
ref[0] = std::min(ref[0],data1[i]);
VERIFY(ei_isApprox(ref[0], ei_predux_min(ei_pload(data1))) && "ei_predux_max");
for (int j=0; j<PacketSize; ++j)
{

View File

@ -1,86 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#include "main.h"
template<typename MatrixType> void matrixProd(const MatrixType& m)
{
typedef typename MatrixType::Scalar Scalar;
int rows = m.rows();
int cols = m.cols();
MatrixType m1 = MatrixType::Random(rows, cols);
VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).prod(), Scalar(1));
VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).prod(), Scalar(1));
Scalar x = Scalar(1);
for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) x *= m1(i,j);
VERIFY_IS_APPROX(m1.prod(), x);
}
template<typename VectorType> void vectorProd(const VectorType& w)
{
typedef typename VectorType::Scalar Scalar;
int size = w.size();
VectorType v = VectorType::Random(size);
for(int i = 1; i < size; i++)
{
Scalar s = Scalar(1);
for(int j = 0; j < i; j++) s *= v[j];
VERIFY_IS_APPROX(s, v.start(i).prod());
}
for(int i = 0; i < size-1; i++)
{
Scalar s = Scalar(1);
for(int j = i; j < size; j++) s *= v[j];
VERIFY_IS_APPROX(s, v.end(size-i).prod());
}
for(int i = 0; i < size/2; i++)
{
Scalar s = Scalar(1);
for(int j = i; j < size-i; j++) s *= v[j];
VERIFY_IS_APPROX(s, v.segment(i, size-2*i).prod());
}
}
void test_prod()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( matrixProd(Matrix<float, 1, 1>()) );
CALL_SUBTEST( matrixProd(Matrix2f()) );
CALL_SUBTEST( matrixProd(Matrix4d()) );
CALL_SUBTEST( matrixProd(MatrixXcf(3, 3)) );
CALL_SUBTEST( matrixProd(MatrixXf(8, 12)) );
CALL_SUBTEST( matrixProd(MatrixXi(8, 12)) );
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( vectorProd(VectorXf(5)) );
CALL_SUBTEST( vectorProd(VectorXd(10)) );
CALL_SUBTEST( vectorProd(VectorXf(33)) );
}
}

127
test/redux.cpp Normal file
View File

@ -0,0 +1,127 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#include "main.h"
template<typename MatrixType> void matrixRedux(const MatrixType& m)
{
typedef typename MatrixType::Scalar Scalar;
int rows = m.rows();
int cols = m.cols();
MatrixType m1 = MatrixType::Random(rows, cols);
VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1));
VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).sum(), Scalar(float(rows*cols))); // the float() here to shut up excessive MSVC warning about int->complex conversion being lossy
Scalar s(0), p(1), minc(ei_real(m1.coeff(0))), maxc(ei_real(m1.coeff(0)));
for(int j = 0; j < cols; j++)
for(int i = 0; i < rows; i++)
{
s += m1(i,j);
p *= m1(i,j);
minc = std::min(ei_real(minc), ei_real(m1(i,j)));
maxc = std::max(ei_real(maxc), ei_real(m1(i,j)));
}
VERIFY_IS_APPROX(m1.sum(), s);
VERIFY_IS_APPROX(m1.prod(), p);
VERIFY_IS_APPROX(m1.real().minCoeff(), ei_real(minc));
VERIFY_IS_APPROX(m1.real().maxCoeff(), ei_real(maxc));
}
template<typename VectorType> void vectorRedux(const VectorType& w)
{
typedef typename VectorType::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
int size = w.size();
VectorType v = VectorType::Random(size);
for(int i = 1; i < size; i++)
{
Scalar s(0), p(1);
RealScalar minc(ei_real(v.coeff(0))), maxc(ei_real(v.coeff(0)));
for(int j = 0; j < i; j++)
{
s += v[j];
p *= v[j];
minc = std::min(minc, ei_real(v[j]));
maxc = std::max(maxc, ei_real(v[j]));
}
VERIFY_IS_APPROX(s, v.start(i).sum());
VERIFY_IS_APPROX(p, v.start(i).prod());
VERIFY_IS_APPROX(minc, v.real().start(i).minCoeff());
VERIFY_IS_APPROX(maxc, v.real().start(i).maxCoeff());
}
for(int i = 0; i < size-1; i++)
{
Scalar s(0), p(1);
RealScalar minc(ei_real(v.coeff(i))), maxc(ei_real(v.coeff(i)));
for(int j = i; j < size; j++)
{
s += v[j];
p *= v[j];
minc = std::min(minc, ei_real(v[j]));
maxc = std::max(maxc, ei_real(v[j]));
}
VERIFY_IS_APPROX(s, v.end(size-i).sum());
VERIFY_IS_APPROX(p, v.end(size-i).prod());
VERIFY_IS_APPROX(minc, v.real().end(size-i).minCoeff());
VERIFY_IS_APPROX(maxc, v.real().end(size-i).maxCoeff());
}
for(int i = 0; i < size/2; i++)
{
Scalar s(0), p(1);
RealScalar minc(ei_real(v.coeff(i))), maxc(ei_real(v.coeff(i)));
for(int j = i; j < size-i; j++)
{
s += v[j];
p *= v[j];
minc = std::min(minc, ei_real(v[j]));
maxc = std::max(maxc, ei_real(v[j]));
}
VERIFY_IS_APPROX(s, v.segment(i, size-2*i).sum());
VERIFY_IS_APPROX(p, v.segment(i, size-2*i).prod());
VERIFY_IS_APPROX(minc, v.real().segment(i, size-2*i).minCoeff());
VERIFY_IS_APPROX(maxc, v.real().segment(i, size-2*i).maxCoeff());
}
}
void test_redux()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( matrixRedux(Matrix<float, 1, 1>()) );
CALL_SUBTEST( matrixRedux(Matrix2f()) );
CALL_SUBTEST( matrixRedux(Matrix4d()) );
CALL_SUBTEST( matrixRedux(MatrixXcf(3, 3)) );
CALL_SUBTEST( matrixRedux(MatrixXd(8, 12)) );
CALL_SUBTEST( matrixRedux(MatrixXi(8, 12)) );
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( vectorRedux(VectorXf(5)) );
CALL_SUBTEST( vectorRedux(VectorXd(10)) );
CALL_SUBTEST( vectorRedux(VectorXf(33)) );
}
}

View File

@ -1,86 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#include "main.h"
template<typename MatrixType> void matrixSum(const MatrixType& m)
{
typedef typename MatrixType::Scalar Scalar;
int rows = m.rows();
int cols = m.cols();
MatrixType m1 = MatrixType::Random(rows, cols);
VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1));
VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).sum(), Scalar(float(rows*cols))); // the float() here to shut up excessive MSVC warning about int->complex conversion being lossy
Scalar x = Scalar(0);
for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) x += m1(i,j);
VERIFY_IS_APPROX(m1.sum(), x);
}
template<typename VectorType> void vectorSum(const VectorType& w)
{
typedef typename VectorType::Scalar Scalar;
int size = w.size();
VectorType v = VectorType::Random(size);
for(int i = 1; i < size; i++)
{
Scalar s = Scalar(0);
for(int j = 0; j < i; j++) s += v[j];
VERIFY_IS_APPROX(s, v.start(i).sum());
}
for(int i = 0; i < size-1; i++)
{
Scalar s = Scalar(0);
for(int j = i; j < size; j++) s += v[j];
VERIFY_IS_APPROX(s, v.end(size-i).sum());
}
for(int i = 0; i < size/2; i++)
{
Scalar s = Scalar(0);
for(int j = i; j < size-i; j++) s += v[j];
VERIFY_IS_APPROX(s, v.segment(i, size-2*i).sum());
}
}
void test_sum()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( matrixSum(Matrix<float, 1, 1>()) );
CALL_SUBTEST( matrixSum(Matrix2f()) );
CALL_SUBTEST( matrixSum(Matrix4d()) );
CALL_SUBTEST( matrixSum(MatrixXcf(3, 3)) );
CALL_SUBTEST( matrixSum(MatrixXf(8, 12)) );
CALL_SUBTEST( matrixSum(MatrixXi(8, 12)) );
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( vectorSum(VectorXf(5)) );
CALL_SUBTEST( vectorSum(VectorXd(10)) );
CALL_SUBTEST( vectorSum(VectorXf(33)) );
}
}