From 51c991af459b86aab4a0a9575edba378e20e8621 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 12 Feb 2009 15:18:59 +0000 Subject: [PATCH] * exit Sum.h, exit Prod.h, welcome vectorization of redux() ! * add vectorization for minCoeff and maxCoeff --- Eigen/Core | 2 - Eigen/src/Core/Functors.h | 16 +- Eigen/src/Core/GenericPacketMath.h | 8 + Eigen/src/Core/Prod.h | 262 ---------------------- Eigen/src/Core/Redux.h | 258 ++++++++++++++++++--- Eigen/src/Core/Sum.h | 272 ----------------------- Eigen/src/Core/arch/AltiVec/PacketMath.h | 71 ++++-- Eigen/src/Core/arch/SSE/PacketMath.h | 56 ++++- test/CMakeLists.txt | 3 +- test/packetmath.cpp | 10 + test/prod.cpp | 86 ------- test/redux.cpp | 127 +++++++++++ test/sum.cpp | 86 ------- 13 files changed, 497 insertions(+), 760 deletions(-) delete mode 100644 Eigen/src/Core/Prod.h delete mode 100644 Eigen/src/Core/Sum.h delete mode 100644 test/prod.cpp create mode 100644 test/redux.cpp delete mode 100644 test/sum.cpp diff --git a/Eigen/Core b/Eigen/Core index ef9ff9a91..b99f49a01 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -138,8 +138,6 @@ namespace Eigen { #include "src/Core/Transpose.h" #include "src/Core/DiagonalMatrix.h" #include "src/Core/DiagonalCoeffs.h" -#include "src/Core/Sum.h" -#include "src/Core/Prod.h" #include "src/Core/Redux.h" #include "src/Core/Visitor.h" #include "src/Core/Fuzzy.h" diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index 8b9516ae6..57a055662 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -37,6 +37,9 @@ template struct ei_scalar_sum_op EIGEN_EMPTY_STRUCT { template EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const { return ei_padd(a,b); } + template + EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const + { return ei_predux(a); } }; template struct ei_functor_traits > { @@ -56,6 +59,9 @@ template struct ei_scalar_product_op EIGEN_EMPTY_STRUCT { template EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const { return ei_pmul(a,b); } + template + EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const + { return ei_predux_mul(a); } }; template struct ei_functor_traits > { @@ -75,6 +81,9 @@ template struct ei_scalar_min_op EIGEN_EMPTY_STRUCT { template EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const { return ei_pmin(a,b); } + template + EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const + { return ei_predux_min(a); } }; template struct ei_functor_traits > { @@ -94,6 +103,9 @@ template struct ei_scalar_max_op EIGEN_EMPTY_STRUCT { template EIGEN_STRONG_INLINE const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const { return ei_pmax(a,b); } + template + EIGEN_STRONG_INLINE const Scalar predux(const PacketScalar& a) const + { return ei_predux_max(a); } }; template struct ei_functor_traits > { @@ -123,7 +135,7 @@ template struct ei_scalar_hypot_op EIGEN_EMPTY_STRUCT { }; template struct ei_functor_traits > { - enum { Cost = 5 * NumTraits::MulCost }; + enum { Cost = 5 * NumTraits::MulCost, PacketAccess=0 }; }; // other binary functors: @@ -197,7 +209,7 @@ struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, - PacketAccess = false // this could actually be vectorized with SSSE3. + PacketAccess = false // FIXME this could actually be vectorized with SSSE3. }; }; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 84326b3a7..10d39b68b 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -100,6 +100,14 @@ template inline typename ei_unpacket_traits::type ei_pr template inline typename ei_unpacket_traits::type ei_predux_mul(const Packet& a) { return a; } +/** \internal \returns the min of the elements of \a a*/ +template inline typename ei_unpacket_traits::type ei_predux_min(const Packet& a) +{ return a; } + +/** \internal \returns the max of the elements of \a a*/ +template inline typename ei_unpacket_traits::type ei_predux_max(const Packet& a) +{ return a; } + /** \internal \returns the reversed elements of \a a*/ template inline Packet ei_preverse(const Packet& a) { return a; } diff --git a/Eigen/src/Core/Prod.h b/Eigen/src/Core/Prod.h deleted file mode 100644 index 8ad22f579..000000000 --- a/Eigen/src/Core/Prod.h +++ /dev/null @@ -1,262 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - -#ifndef EIGEN_PROD_H -#define EIGEN_PROD_H - -/*************************************************************************** -* Part 1 : the logic deciding a strategy for vectorization and unrolling -***************************************************************************/ - -template -struct ei_prod_traits -{ -private: - enum { - PacketSize = ei_packet_traits::size - }; - -public: - enum { - Vectorization = (int(Derived::Flags)&ActualPacketAccessBit) - && (int(Derived::Flags)&LinearAccessBit) - ? LinearVectorization - : NoVectorization - }; - -private: - enum { - Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * NumTraits::MulCost, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)) - }; - -public: - enum { - Unrolling = Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling - }; -}; - -/*************************************************************************** -* Part 2 : unrollers -***************************************************************************/ - -/*** no vectorization ***/ - -template -struct ei_prod_novec_unroller -{ - enum { - HalfLength = Length/2 - }; - - typedef typename Derived::Scalar Scalar; - - inline static Scalar run(const Derived &mat) - { - return ei_prod_novec_unroller::run(mat) - * ei_prod_novec_unroller::run(mat); - } -}; - -template -struct ei_prod_novec_unroller -{ - enum { - col = Start / Derived::RowsAtCompileTime, - row = Start % Derived::RowsAtCompileTime - }; - - typedef typename Derived::Scalar Scalar; - - inline static Scalar run(const Derived &mat) - { - return mat.coeff(row, col); - } -}; - -/*** vectorization ***/ - -template -struct ei_prod_vec_unroller -{ - enum { - PacketSize = ei_packet_traits::size, - HalfLength = Length/2 - }; - - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - inline static PacketScalar run(const Derived &mat) - { - return ei_pmul( - ei_prod_vec_unroller::run(mat), - ei_prod_vec_unroller::run(mat) ); - } -}; - -template -struct ei_prod_vec_unroller -{ - enum { - index = Start * ei_packet_traits::size, - row = int(Derived::Flags)&RowMajorBit - ? index / int(Derived::ColsAtCompileTime) - : index % Derived::RowsAtCompileTime, - col = int(Derived::Flags)&RowMajorBit - ? index % int(Derived::ColsAtCompileTime) - : index / Derived::RowsAtCompileTime, - alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned - }; - - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - inline static PacketScalar run(const Derived &mat) - { - return mat.template packet(row, col); - } -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Vectorization, - int Unrolling = ei_prod_traits::Unrolling -> -struct ei_prod_impl; - -template -struct ei_prod_impl -{ - typedef typename Derived::Scalar Scalar; - static Scalar run(const Derived& mat) - { - ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix"); - Scalar res; - res = mat.coeff(0, 0); - for(int i = 1; i < mat.rows(); ++i) - res *= mat.coeff(i, 0); - for(int j = 1; j < mat.cols(); ++j) - for(int i = 0; i < mat.rows(); ++i) - res *= mat.coeff(i, j); - return res; - } -}; - -template -struct ei_prod_impl - : public ei_prod_novec_unroller -{}; - -template -struct ei_prod_impl -{ - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - static Scalar run(const Derived& mat) - { - const int size = mat.size(); - const int packetSize = ei_packet_traits::size; - const int alignedStart = (Derived::Flags & AlignedBit) - || !(Derived::Flags & DirectAccessBit) - ? 0 - : ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size); - enum { - alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit) - ? Aligned : Unaligned - }; - const int alignedSize = ((size-alignedStart)/packetSize)*packetSize; - const int alignedEnd = alignedStart + alignedSize; - Scalar res; - - if(alignedSize) - { - PacketScalar packet_res = mat.template packet(alignedStart); - for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize) - packet_res = ei_pmul(packet_res, mat.template packet(index)); - res = ei_predux_mul(packet_res); - } - else // too small to vectorize anything. - // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. - { - res = Scalar(1); - } - - for(int index = 0; index < alignedStart; ++index) - res *= mat.coeff(index); - - for(int index = alignedEnd; index < size; ++index) - res *= mat.coeff(index); - - return res; - } -}; - -template -struct ei_prod_impl -{ - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - enum { - PacketSize = ei_packet_traits::size, - Size = Derived::SizeAtCompileTime, - VectorizationSize = (Size / PacketSize) * PacketSize - }; - static Scalar run(const Derived& mat) - { - Scalar res = ei_predux_mul(ei_prod_vec_unroller::run(mat)); - if (VectorizationSize != Size) - res *= ei_prod_novec_unroller::run(mat); - return res; - } -}; - -/*************************************************************************** -* Part 4 : implementation of MatrixBase methods -***************************************************************************/ - -/** \returns the product of all coefficients of *this - * - * Example: \include MatrixBase_prod.cpp - * Output: \verbinclude MatrixBase_prod.out - * - * \sa sum() - */ -template -inline typename ei_traits::Scalar -MatrixBase::prod() const -{ - typedef typename ei_cleantype::type ThisNested; - return ei_prod_impl::run(derived()); -} - -#endif // EIGEN_PROD_H diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 5b5907073..889a12d30 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -26,48 +26,147 @@ #ifndef EIGEN_REDUX_H #define EIGEN_REDUX_H -template -struct ei_redux_impl +// TODO +// * implement other kind of vectorization +// * factorize code + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for vectorization and unrolling +***************************************************************************/ + +template +struct ei_redux_traits +{ +private: + enum { + PacketSize = ei_packet_traits::size + }; + +public: + enum { + Vectorization = (int(Derived::Flags)&ActualPacketAccessBit) + && (int(Derived::Flags)&LinearAccessBit) + && (ei_functor_traits::PacketAccess) + ? LinearVectorization + : NoVectorization + }; + +private: + enum { + Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost + + (Derived::SizeAtCompileTime-1) * NumTraits::AddCost, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)) + }; + +public: + enum { + Unrolling = Cost <= UnrollingLimit + ? CompleteUnrolling + : NoUnrolling + }; +}; + +/*************************************************************************** +* Part 2 : unrollers +***************************************************************************/ + +/*** no vectorization ***/ + +template +struct ei_redux_novec_unroller { enum { HalfLength = Length/2 }; - typedef typename ei_result_of::type Scalar; + typedef typename Derived::Scalar Scalar; - static Scalar run(const Derived &mat, const BinaryOp& func) + EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func& func) { - return func( - ei_redux_impl::run(mat, func), - ei_redux_impl::run(mat, func)); + return func(ei_redux_novec_unroller::run(mat,func), + ei_redux_novec_unroller::run(mat,func)); } }; -template -struct ei_redux_impl +template +struct ei_redux_novec_unroller { enum { col = Start / Derived::RowsAtCompileTime, row = Start % Derived::RowsAtCompileTime }; - typedef typename ei_result_of::type Scalar; + typedef typename Derived::Scalar Scalar; - static Scalar run(const Derived &mat, const BinaryOp &) + EIGEN_STRONG_INLINE static Scalar run(const Derived &mat, const Func&) { return mat.coeff(row, col); } }; -template -struct ei_redux_impl +/*** vectorization ***/ + +template +struct ei_redux_vec_unroller { - typedef typename ei_result_of::type Scalar; - static Scalar run(const Derived& mat, const BinaryOp& func) + enum { + PacketSize = ei_packet_traits::size, + HalfLength = Length/2 + }; + + typedef typename Derived::Scalar Scalar; + typedef typename ei_packet_traits::type PacketScalar; + + EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func& func) + { + return func.packetOp( + ei_redux_vec_unroller::run(mat,func), + ei_redux_vec_unroller::run(mat,func) ); + } +}; + +template +struct ei_redux_vec_unroller +{ + enum { + index = Start * ei_packet_traits::size, + row = int(Derived::Flags)&RowMajorBit + ? index / int(Derived::ColsAtCompileTime) + : index % Derived::RowsAtCompileTime, + col = int(Derived::Flags)&RowMajorBit + ? index % int(Derived::ColsAtCompileTime) + : index / Derived::RowsAtCompileTime, + alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned + }; + + typedef typename Derived::Scalar Scalar; + typedef typename ei_packet_traits::type PacketScalar; + + EIGEN_STRONG_INLINE static PacketScalar run(const Derived &mat, const Func&) + { + return mat.template packet(row, col); + } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +template::Vectorization, + int Unrolling = ei_redux_traits::Unrolling +> +struct ei_redux_impl; + +template +struct ei_redux_impl +{ + typedef typename Derived::Scalar Scalar; + static Scalar run(const Derived& mat, const Func& func) { ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix"); Scalar res; - res = mat.coeff(0,0); + res = mat.coeff(0, 0); for(int i = 1; i < mat.rows(); ++i) res = func(res, mat.coeff(i, 0)); for(int j = 1; j < mat.cols(); ++j) @@ -77,6 +176,77 @@ struct ei_redux_impl } }; +template +struct ei_redux_impl + : public ei_redux_novec_unroller +{}; + +template +struct ei_redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename ei_packet_traits::type PacketScalar; + + static Scalar run(const Derived& mat, const Func& func) + { + const int size = mat.size(); + const int packetSize = ei_packet_traits::size; + const int alignedStart = (Derived::Flags & AlignedBit) + || !(Derived::Flags & DirectAccessBit) + ? 0 + : ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size); + enum { + alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit) + ? Aligned : Unaligned + }; + const int alignedSize = ((size-alignedStart)/packetSize)*packetSize; + const int alignedEnd = alignedStart + alignedSize; + Scalar res; + if(alignedSize) + { + PacketScalar packet_res = mat.template packet(alignedStart); + for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize) + packet_res = func.packetOp(packet_res, mat.template packet(index)); + res = func.predux(packet_res); + + for(int index = 0; index < alignedStart; ++index) + res = func(res,mat.coeff(index)); + + for(int index = alignedEnd; index < size; ++index) + res = func(res,mat.coeff(index)); + } + else // too small to vectorize anything. + // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. + { + res = mat.coeff(0); + for(int index = 1; index < size; ++index) + res = func(res,mat.coeff(index)); + } + + return res; + } +}; + +template +struct ei_redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename ei_packet_traits::type PacketScalar; + enum { + PacketSize = ei_packet_traits::size, + Size = Derived::SizeAtCompileTime, + VectorizationSize = (Size / PacketSize) * PacketSize + }; + EIGEN_STRONG_INLINE static Scalar run(const Derived& mat, const Func& func) + { + Scalar res = func.predux(ei_redux_vec_unroller::run(mat,func)); + if (VectorizationSize != Size) + res = func(res,ei_redux_novec_unroller::run(mat,func)); + return res; + } +}; + + /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be @@ -85,22 +255,20 @@ struct ei_redux_impl * \sa MatrixBase::sum(), MatrixBase::minCoeff(), MatrixBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() */ template -template -typename ei_result_of::Scalar)>::type -MatrixBase::redux(const BinaryOp& func) const +template +inline typename ei_result_of::Scalar)>::type +MatrixBase::redux(const Func& func) const { - const bool unroll = SizeAtCompileTime * CoeffReadCost - + (SizeAtCompileTime-1) * ei_functor_traits::Cost - <= EIGEN_UNROLLING_LIMIT; + typename Derived::Nested nested(derived()); typedef typename ei_cleantype::type ThisNested; - return ei_redux_impl - ::run(derived(), func); + return ei_redux_impl + ::run(nested, func); } /** \returns the minimum of all coefficients of *this */ template -inline typename ei_traits::Scalar +EIGEN_STRONG_INLINE typename ei_traits::Scalar MatrixBase::minCoeff() const { return this->redux(Eigen::ei_scalar_min_op()); @@ -109,10 +277,48 @@ MatrixBase::minCoeff() const /** \returns the maximum of all coefficients of *this */ template -inline typename ei_traits::Scalar +EIGEN_STRONG_INLINE typename ei_traits::Scalar MatrixBase::maxCoeff() const { return this->redux(Eigen::ei_scalar_max_op()); } +/** \returns the sum of all coefficients of *this + * + * \sa trace(), prod() + */ +template +EIGEN_STRONG_INLINE typename ei_traits::Scalar +MatrixBase::sum() const +{ + return this->redux(Eigen::ei_scalar_sum_op()); +} + +/** \returns the product of all coefficients of *this + * + * Example: \include MatrixBase_prod.cpp + * Output: \verbinclude MatrixBase_prod.out + * + * \sa sum() + */ +template +EIGEN_STRONG_INLINE typename ei_traits::Scalar +MatrixBase::prod() const +{ + return this->redux(Eigen::ei_scalar_product_op()); +} + +/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. + * + * \c *this can be any matrix, not necessarily square. + * + * \sa diagonal(), sum() + */ +template +EIGEN_STRONG_INLINE typename ei_traits::Scalar +MatrixBase::trace() const +{ + return diagonal().sum(); +} + #endif // EIGEN_REDUX_H diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h deleted file mode 100644 index 61ff7e4a6..000000000 --- a/Eigen/src/Core/Sum.h +++ /dev/null @@ -1,272 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - -#ifndef EIGEN_SUM_H -#define EIGEN_SUM_H - -/*************************************************************************** -* Part 1 : the logic deciding a strategy for vectorization and unrolling -***************************************************************************/ - -template -struct ei_sum_traits -{ -private: - enum { - PacketSize = ei_packet_traits::size - }; - -public: - enum { - Vectorization = (int(Derived::Flags)&ActualPacketAccessBit) - && (int(Derived::Flags)&LinearAccessBit) - ? LinearVectorization - : NoVectorization - }; - -private: - enum { - Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * NumTraits::AddCost, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)) - }; - -public: - enum { - Unrolling = Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling - }; -}; - -/*************************************************************************** -* Part 2 : unrollers -***************************************************************************/ - -/*** no vectorization ***/ - -template -struct ei_sum_novec_unroller -{ - enum { - HalfLength = Length/2 - }; - - typedef typename Derived::Scalar Scalar; - - inline static Scalar run(const Derived &mat) - { - return ei_sum_novec_unroller::run(mat) - + ei_sum_novec_unroller::run(mat); - } -}; - -template -struct ei_sum_novec_unroller -{ - enum { - col = Start / Derived::RowsAtCompileTime, - row = Start % Derived::RowsAtCompileTime - }; - - typedef typename Derived::Scalar Scalar; - - inline static Scalar run(const Derived &mat) - { - return mat.coeff(row, col); - } -}; - -/*** vectorization ***/ - -template -struct ei_sum_vec_unroller -{ - enum { - PacketSize = ei_packet_traits::size, - HalfLength = Length/2 - }; - - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - inline static PacketScalar run(const Derived &mat) - { - return ei_padd( - ei_sum_vec_unroller::run(mat), - ei_sum_vec_unroller::run(mat) ); - } -}; - -template -struct ei_sum_vec_unroller -{ - enum { - index = Start * ei_packet_traits::size, - row = int(Derived::Flags)&RowMajorBit - ? index / int(Derived::ColsAtCompileTime) - : index % Derived::RowsAtCompileTime, - col = int(Derived::Flags)&RowMajorBit - ? index % int(Derived::ColsAtCompileTime) - : index / Derived::RowsAtCompileTime, - alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned - }; - - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - inline static PacketScalar run(const Derived &mat) - { - return mat.template packet(row, col); - } -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Vectorization, - int Unrolling = ei_sum_traits::Unrolling -> -struct ei_sum_impl; - -template -struct ei_sum_impl -{ - typedef typename Derived::Scalar Scalar; - static Scalar run(const Derived& mat) - { - ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix"); - Scalar res; - res = mat.coeff(0, 0); - for(int i = 1; i < mat.rows(); ++i) - res += mat.coeff(i, 0); - for(int j = 1; j < mat.cols(); ++j) - for(int i = 0; i < mat.rows(); ++i) - res += mat.coeff(i, j); - return res; - } -}; - -template -struct ei_sum_impl - : public ei_sum_novec_unroller -{}; - -template -struct ei_sum_impl -{ - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - static Scalar run(const Derived& mat) - { - const int size = mat.size(); - const int packetSize = ei_packet_traits::size; - const int alignedStart = (Derived::Flags & AlignedBit) - || !(Derived::Flags & DirectAccessBit) - ? 0 - : ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size); - enum { - alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit) - ? Aligned : Unaligned - }; - const int alignedSize = ((size-alignedStart)/packetSize)*packetSize; - const int alignedEnd = alignedStart + alignedSize; - Scalar res; - - if(alignedSize) - { - PacketScalar packet_res = mat.template packet(alignedStart); - for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize) - packet_res = ei_padd(packet_res, mat.template packet(index)); - res = ei_predux(packet_res); - } - else // too small to vectorize anything. - // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. - { - res = Scalar(0); - } - - for(int index = 0; index < alignedStart; ++index) - res += mat.coeff(index); - - for(int index = alignedEnd; index < size; ++index) - res += mat.coeff(index); - - return res; - } -}; - -template -struct ei_sum_impl -{ - typedef typename Derived::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - enum { - PacketSize = ei_packet_traits::size, - Size = Derived::SizeAtCompileTime, - VectorizationSize = (Size / PacketSize) * PacketSize - }; - static Scalar run(const Derived& mat) - { - Scalar res = ei_predux(ei_sum_vec_unroller::run(mat)); - if (VectorizationSize != Size) - res += ei_sum_novec_unroller::run(mat); - return res; - } -}; - -/*************************************************************************** -* Part 4 : implementation of MatrixBase methods -***************************************************************************/ - -/** \returns the sum of all coefficients of *this - * - * \sa trace(), prod() - */ -template -inline typename ei_traits::Scalar -MatrixBase::sum() const -{ - typedef typename ei_cleantype::type ThisNested; - return ei_sum_impl::run(derived()); -} - -/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. - * - * \c *this can be any matrix, not necessarily square. - * - * \sa diagonal(), sum() - */ -template -inline typename ei_traits::Scalar -MatrixBase::trace() const -{ - return diagonal().sum(); -} - -#endif // EIGEN_SUM_H diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 37eafa81f..b90a6fae1 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -177,7 +177,7 @@ template<> inline v4f ei_ploadu(const float* from) return (v4f) vec_perm(MSQ, LSQ, mask); // align the data } -template<> inline v4i ei_ploadu(const int* from) +template<> inline v4i ei_ploadu(const int* from) { // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html __vector unsigned char MSQ, LSQ; @@ -198,7 +198,7 @@ template<> inline v4f ei_pset1(const float& from) return vc; } -template<> inline v4i ei_pset1(const int& from) +template<> inline v4i ei_pset1(const int& from) { int __attribute__(aligned(16)) ai[4]; ai[0] = from; @@ -248,26 +248,28 @@ template<> inline void ei_pstoreu(int* to , const v4i& from ) template<> inline float ei_pfirst(const v4f& a) { - float __attribute__(aligned(16)) af[4]; + float EIGEN_ALIGN_128 af[4]; vec_st(a, 0, af); return af[0]; } template<> inline int ei_pfirst(const v4i& a) { - int __attribute__(aligned(16)) ai[4]; + int EIGEN_ALIGN_128 ai[4]; vec_st(a, 0, ai); return ai[0]; } template<> EIGEN_STRONG_INLINE v4f ei_preverse(const v4f& a) { - static const __vector unsigned char reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; + static const __vector unsigned char reverse_mask = + {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; return (v4f)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask); } template<> EIGEN_STRONG_INLINE v4i ei_preverse(const v4i& a) { - static const __vector unsigned char __attribute__(aligned(16)) reverse_mask = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; + static const __vector unsigned char __attribute__(aligned(16)) reverse_mask = + {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; return (v4i)vec_perm((__vector unsigned char)a,(__vector unsigned char)a,reverse_mask); } @@ -344,26 +346,59 @@ inline int ei_predux(const v4i& a) return ei_pfirst(sum); } +// implement other reductions operators inline float ei_predux_mul(const v4f& a) { - v4f b, sum; - b = (v4f)vec_sld(a, a, 8); - sum = ei_pmul(a, b); - b = (v4f)vec_sld(sum, sum, 4); - sum = ei_pmul(sum, b); - return ei_pfirst(sum); + v4f prod; + prod = ei_pmul(a, (v4f)vec_sld(a, a, 8)); + return ei_pfirst(ei_pmul(prod, (v4f)vec_sld(prod, prod, 4))); } inline int ei_predux_mul(const v4i& a) { - v4i b, sum; - b = (v4i)vec_sld(a, a, 8); - sum = ei_pmul(a, b); - b = (v4i)vec_sld(sum, sum, 4); - sum = ei_pmul(sum, b); - return ei_pfirst(sum); + EIGEN_ALIGN_128 int aux[4]; + ei_pstore(aux, a); + return aux[0] * aux[1] * aux[2] * aux[3]; } +inline float ei_predux_min(const v4f& a) +{ + EIGEN_ALIGN_128 float aux[4]; + ei_pstore(aux, a); + register float aux0 = aux[0]aux[1] ? aux[0] : aux[1]; + register float aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; + return aux0>aux2 ? aux0 : aux2; +} + +inline int ei_predux_max(const v4i& a) +{ + EIGEN_ALIGN_128 int aux[4]; + ei_pstore(aux, a); + register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1]; + register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; + return aux0>aux2 ? aux0 : aux2; +} + + + template struct ei_palign_impl { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 7bef79309..c2fbcd7cc 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -53,6 +53,7 @@ template<> EIGEN_STRONG_INLINE __m128 ei_pmul<__m128>(const __m128& a, const _ template<> EIGEN_STRONG_INLINE __m128d ei_pmul<__m128d>(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); } template<> EIGEN_STRONG_INLINE __m128i ei_pmul<__m128i>(const __m128i& a, const __m128i& b) { + // this version is very slightly faster than 4 scalar products return _mm_or_si128( _mm_and_si128( _mm_mul_epu32(a,b), @@ -76,18 +77,18 @@ template<> EIGEN_STRONG_INLINE __m128i ei_pmadd(const __m128i& a, const __m128i& template<> EIGEN_STRONG_INLINE __m128 ei_pmin<__m128>(const __m128& a, const __m128& b) { return _mm_min_ps(a,b); } template<> EIGEN_STRONG_INLINE __m128d ei_pmin<__m128d>(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); } -// FIXME this vectorized min operator is likely to be slower than the standard one template<> EIGEN_STRONG_INLINE __m128i ei_pmin<__m128i>(const __m128i& a, const __m128i& b) { + // after some bench, this version *is* faster than a scalar implementation __m128i mask = _mm_cmplt_epi32(a,b); return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); } template<> EIGEN_STRONG_INLINE __m128 ei_pmax<__m128>(const __m128& a, const __m128& b) { return _mm_max_ps(a,b); } template<> EIGEN_STRONG_INLINE __m128d ei_pmax<__m128d>(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); } -// FIXME this vectorized max operator is likely to be slower than the standard one template<> EIGEN_STRONG_INLINE __m128i ei_pmax<__m128i>(const __m128i& a, const __m128i& b) { + // after some bench, this version *is* faster than a scalar implementation __m128i mask = _mm_cmpgt_epi32(a,b); return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); } @@ -216,6 +217,7 @@ template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs) // Other reduction functions: +// mul template<> EIGEN_STRONG_INLINE float ei_predux_mul<__m128>(const __m128& a) { __m128 tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a)); @@ -227,8 +229,54 @@ template<> EIGEN_STRONG_INLINE double ei_predux_mul<__m128d>(const __m128d& a) } template<> EIGEN_STRONG_INLINE int ei_predux_mul<__m128i>(const __m128i& a) { - __m128i tmp = ei_pmul(a, _mm_unpackhi_epi64(a,a)); - return ei_pfirst(tmp) * ei_pfirst(_mm_shuffle_epi32(tmp, 1)); + // after some experiments, it is seems this is the fastest way to implement it + // for GCC (eg., reusing ei_pmul is very slow !) + // TODO try to call _mm_mul_epu32 directly + EIGEN_ALIGN_128 int aux[4]; + ei_pstore(aux, a); + return (aux[0] * aux[1]) * (aux[2] * aux[3]);; +} + +// min +template<> EIGEN_STRONG_INLINE float ei_predux_min<__m128>(const __m128& a) +{ + __m128 tmp = _mm_min_ps(a, _mm_movehl_ps(a,a)); + return ei_pfirst(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +} +template<> EIGEN_STRONG_INLINE double ei_predux_min<__m128d>(const __m128d& a) +{ + return ei_pfirst(_mm_min_sd(a, _mm_unpackhi_pd(a,a))); +} +template<> EIGEN_STRONG_INLINE int ei_predux_min<__m128i>(const __m128i& a) +{ + // after some experiments, it is seems this is the fastest way to implement it + // for GCC (eg., it does not like using std::min after the ei_pstore !!) + EIGEN_ALIGN_128 int aux[4]; + ei_pstore(aux, a); + register int aux0 = aux[0] EIGEN_STRONG_INLINE float ei_predux_max<__m128>(const __m128& a) +{ + __m128 tmp = _mm_max_ps(a, _mm_movehl_ps(a,a)); + return ei_pfirst(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +} +template<> EIGEN_STRONG_INLINE double ei_predux_max<__m128d>(const __m128d& a) +{ + return ei_pfirst(_mm_max_sd(a, _mm_unpackhi_pd(a,a))); +} +template<> EIGEN_STRONG_INLINE int ei_predux_max<__m128i>(const __m128i& a) +{ + // after some experiments, it is seems this is the fastest way to implement it + // for GCC (eg., it does not like using std::min after the ei_pstore !!) + EIGEN_ALIGN_128 int aux[4]; + ei_pstore(aux, a); + register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1]; + register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; + return aux0>aux2 ? aux0 : aux2; } #if (defined __GNUC__) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a20a6a12f..4046cf533 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -111,7 +111,7 @@ ei_add_test(vectorization_logic) ei_add_test(basicstuff) ei_add_test(linearstructure) ei_add_test(cwiseop) -ei_add_test(sum) +ei_add_test(redux) ei_add_test(product_small) ei_add_test(product_large ${EI_OFLAG}) ei_add_test(adjoint) @@ -149,7 +149,6 @@ ei_add_test(sparse_basic) ei_add_test(sparse_product) ei_add_test(sparse_solvers " " "${SPARSE_LIBS}") ei_add_test(reverse) -ei_add_test(prod) # print a summary of the different options message("************************************************************") diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 0397d9b28..f56532f91 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -129,6 +129,16 @@ template void packetmath() for (int i=0; i -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - -#include "main.h" - -template void matrixProd(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols); - - VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).prod(), Scalar(1)); - VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).prod(), Scalar(1)); - Scalar x = Scalar(1); - for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) x *= m1(i,j); - VERIFY_IS_APPROX(m1.prod(), x); -} - -template void vectorProd(const VectorType& w) -{ - typedef typename VectorType::Scalar Scalar; - int size = w.size(); - - VectorType v = VectorType::Random(size); - for(int i = 1; i < size; i++) - { - Scalar s = Scalar(1); - for(int j = 0; j < i; j++) s *= v[j]; - VERIFY_IS_APPROX(s, v.start(i).prod()); - } - - for(int i = 0; i < size-1; i++) - { - Scalar s = Scalar(1); - for(int j = i; j < size; j++) s *= v[j]; - VERIFY_IS_APPROX(s, v.end(size-i).prod()); - } - - for(int i = 0; i < size/2; i++) - { - Scalar s = Scalar(1); - for(int j = i; j < size-i; j++) s *= v[j]; - VERIFY_IS_APPROX(s, v.segment(i, size-2*i).prod()); - } -} - -void test_prod() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST( matrixProd(Matrix()) ); - CALL_SUBTEST( matrixProd(Matrix2f()) ); - CALL_SUBTEST( matrixProd(Matrix4d()) ); - CALL_SUBTEST( matrixProd(MatrixXcf(3, 3)) ); - CALL_SUBTEST( matrixProd(MatrixXf(8, 12)) ); - CALL_SUBTEST( matrixProd(MatrixXi(8, 12)) ); - } - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST( vectorProd(VectorXf(5)) ); - CALL_SUBTEST( vectorProd(VectorXd(10)) ); - CALL_SUBTEST( vectorProd(VectorXf(33)) ); - } -} diff --git a/test/redux.cpp b/test/redux.cpp new file mode 100644 index 000000000..dee9d4bbd --- /dev/null +++ b/test/redux.cpp @@ -0,0 +1,127 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2008 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" + +template void matrixRedux(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + + int rows = m.rows(); + int cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols); + + VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); + VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).sum(), Scalar(float(rows*cols))); // the float() here to shut up excessive MSVC warning about int->complex conversion being lossy + Scalar s(0), p(1), minc(ei_real(m1.coeff(0))), maxc(ei_real(m1.coeff(0))); + for(int j = 0; j < cols; j++) + for(int i = 0; i < rows; i++) + { + s += m1(i,j); + p *= m1(i,j); + minc = std::min(ei_real(minc), ei_real(m1(i,j))); + maxc = std::max(ei_real(maxc), ei_real(m1(i,j))); + } + VERIFY_IS_APPROX(m1.sum(), s); + VERIFY_IS_APPROX(m1.prod(), p); + VERIFY_IS_APPROX(m1.real().minCoeff(), ei_real(minc)); + VERIFY_IS_APPROX(m1.real().maxCoeff(), ei_real(maxc)); +} + +template void vectorRedux(const VectorType& w) +{ + typedef typename VectorType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + int size = w.size(); + + VectorType v = VectorType::Random(size); + for(int i = 1; i < size; i++) + { + Scalar s(0), p(1); + RealScalar minc(ei_real(v.coeff(0))), maxc(ei_real(v.coeff(0))); + for(int j = 0; j < i; j++) + { + s += v[j]; + p *= v[j]; + minc = std::min(minc, ei_real(v[j])); + maxc = std::max(maxc, ei_real(v[j])); + } + VERIFY_IS_APPROX(s, v.start(i).sum()); + VERIFY_IS_APPROX(p, v.start(i).prod()); + VERIFY_IS_APPROX(minc, v.real().start(i).minCoeff()); + VERIFY_IS_APPROX(maxc, v.real().start(i).maxCoeff()); + } + + for(int i = 0; i < size-1; i++) + { + Scalar s(0), p(1); + RealScalar minc(ei_real(v.coeff(i))), maxc(ei_real(v.coeff(i))); + for(int j = i; j < size; j++) + { + s += v[j]; + p *= v[j]; + minc = std::min(minc, ei_real(v[j])); + maxc = std::max(maxc, ei_real(v[j])); + } + VERIFY_IS_APPROX(s, v.end(size-i).sum()); + VERIFY_IS_APPROX(p, v.end(size-i).prod()); + VERIFY_IS_APPROX(minc, v.real().end(size-i).minCoeff()); + VERIFY_IS_APPROX(maxc, v.real().end(size-i).maxCoeff()); + } + + for(int i = 0; i < size/2; i++) + { + Scalar s(0), p(1); + RealScalar minc(ei_real(v.coeff(i))), maxc(ei_real(v.coeff(i))); + for(int j = i; j < size-i; j++) + { + s += v[j]; + p *= v[j]; + minc = std::min(minc, ei_real(v[j])); + maxc = std::max(maxc, ei_real(v[j])); + } + VERIFY_IS_APPROX(s, v.segment(i, size-2*i).sum()); + VERIFY_IS_APPROX(p, v.segment(i, size-2*i).prod()); + VERIFY_IS_APPROX(minc, v.real().segment(i, size-2*i).minCoeff()); + VERIFY_IS_APPROX(maxc, v.real().segment(i, size-2*i).maxCoeff()); + } +} + +void test_redux() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST( matrixRedux(Matrix()) ); + CALL_SUBTEST( matrixRedux(Matrix2f()) ); + CALL_SUBTEST( matrixRedux(Matrix4d()) ); + CALL_SUBTEST( matrixRedux(MatrixXcf(3, 3)) ); + CALL_SUBTEST( matrixRedux(MatrixXd(8, 12)) ); + CALL_SUBTEST( matrixRedux(MatrixXi(8, 12)) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST( vectorRedux(VectorXf(5)) ); + CALL_SUBTEST( vectorRedux(VectorXd(10)) ); + CALL_SUBTEST( vectorRedux(VectorXf(33)) ); + } +} diff --git a/test/sum.cpp b/test/sum.cpp deleted file mode 100644 index fe707e9b2..000000000 --- a/test/sum.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Benoit Jacob -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - -#include "main.h" - -template void matrixSum(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols); - - VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); - VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).sum(), Scalar(float(rows*cols))); // the float() here to shut up excessive MSVC warning about int->complex conversion being lossy - Scalar x = Scalar(0); - for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) x += m1(i,j); - VERIFY_IS_APPROX(m1.sum(), x); -} - -template void vectorSum(const VectorType& w) -{ - typedef typename VectorType::Scalar Scalar; - int size = w.size(); - - VectorType v = VectorType::Random(size); - for(int i = 1; i < size; i++) - { - Scalar s = Scalar(0); - for(int j = 0; j < i; j++) s += v[j]; - VERIFY_IS_APPROX(s, v.start(i).sum()); - } - - for(int i = 0; i < size-1; i++) - { - Scalar s = Scalar(0); - for(int j = i; j < size; j++) s += v[j]; - VERIFY_IS_APPROX(s, v.end(size-i).sum()); - } - - for(int i = 0; i < size/2; i++) - { - Scalar s = Scalar(0); - for(int j = i; j < size-i; j++) s += v[j]; - VERIFY_IS_APPROX(s, v.segment(i, size-2*i).sum()); - } -} - -void test_sum() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST( matrixSum(Matrix()) ); - CALL_SUBTEST( matrixSum(Matrix2f()) ); - CALL_SUBTEST( matrixSum(Matrix4d()) ); - CALL_SUBTEST( matrixSum(MatrixXcf(3, 3)) ); - CALL_SUBTEST( matrixSum(MatrixXf(8, 12)) ); - CALL_SUBTEST( matrixSum(MatrixXi(8, 12)) ); - } - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST( vectorSum(VectorXf(5)) ); - CALL_SUBTEST( vectorSum(VectorXd(10)) ); - CALL_SUBTEST( vectorSum(VectorXf(33)) ); - } -}