* add ei_predux_mul internal function

* apply Ricard Marxer's prod() patch with fixes for the vectorized path
This commit is contained in:
Gael Guennebaud 2009-02-10 18:06:05 +00:00
parent a0cc5fba0a
commit cbbc6d940b
12 changed files with 397 additions and 1 deletions

View File

@ -139,6 +139,7 @@ namespace Eigen {
#include "src/Core/DiagonalMatrix.h" #include "src/Core/DiagonalMatrix.h"
#include "src/Core/DiagonalCoeffs.h" #include "src/Core/DiagonalCoeffs.h"
#include "src/Core/Sum.h" #include "src/Core/Sum.h"
#include "src/Core/Prod.h"
#include "src/Core/Redux.h" #include "src/Core/Redux.h"
#include "src/Core/Visitor.h" #include "src/Core/Visitor.h"
#include "src/Core/Fuzzy.h" #include "src/Core/Fuzzy.h"

View File

@ -115,6 +115,8 @@ EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
/** \internal */ /** \internal */
template <typename BinaryOp, typename Scalar> template <typename BinaryOp, typename Scalar>
@ -258,6 +260,16 @@ template<typename ExpressionType, int Direction> class PartialRedux
const PartialReduxExpr<ExpressionType, ei_member_count<int>, Direction> count() const const PartialReduxExpr<ExpressionType, ei_member_count<int>, Direction> count() const
{ return _expression(); } { return _expression(); }
/** \returns a row (or column) vector expression of the product
* of each column (or row) of the referenced expression.
*
* Example: \include PartialRedux_prod.cpp
* Output: \verbinclude PartialRedux_prod.out
*
* \sa MatrixBase::prod() */
const typename ReturnType<ei_member_prod>::Type prod() const
{ return _expression(); }
/** \returns a matrix expression /** \returns a matrix expression
* where each column (or row) are reversed. * where each column (or row) are reversed.

View File

@ -96,6 +96,10 @@ ei_preduxp(const Packet* vecs) { return vecs[0]; }
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a) template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a)
{ return a; } { return a; }
/** \internal \returns the product of the elements of \a a*/
template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a)
{ return a; }
/** \internal \returns the reversed elements of \a a*/ /** \internal \returns the reversed elements of \a a*/
template<typename Packet> inline Packet ei_preverse(const Packet& a) template<typename Packet> inline Packet ei_preverse(const Packet& a)
{ return a; } { return a; }

View File

@ -535,6 +535,8 @@ template<typename Derived> class MatrixBase
Scalar sum() const; Scalar sum() const;
Scalar trace() const; Scalar trace() const;
Scalar prod() const;
typename ei_traits<Derived>::Scalar minCoeff() const; typename ei_traits<Derived>::Scalar minCoeff() const;
typename ei_traits<Derived>::Scalar maxCoeff() const; typename ei_traits<Derived>::Scalar maxCoeff() const;

262
Eigen/src/Core/Prod.h Normal file
View File

@ -0,0 +1,262 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_PROD_H
#define EIGEN_PROD_H
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
template<typename Derived>
struct ei_prod_traits
{
private:
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
};
public:
enum {
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
&& (int(Derived::Flags)&LinearAccessBit)
? LinearVectorization
: NoVectorization
};
private:
enum {
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::MulCost,
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
};
public:
enum {
Unrolling = Cost <= UnrollingLimit
? CompleteUnrolling
: NoUnrolling
};
};
/***************************************************************************
* Part 2 : unrollers
***************************************************************************/
/*** no vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_prod_novec_unroller
{
enum {
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return ei_prod_novec_unroller<Derived, Start, HalfLength>::run(mat)
* ei_prod_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
}
};
template<typename Derived, int Start>
struct ei_prod_novec_unroller<Derived, Start, 1>
{
enum {
col = Start / Derived::RowsAtCompileTime,
row = Start % Derived::RowsAtCompileTime
};
typedef typename Derived::Scalar Scalar;
inline static Scalar run(const Derived &mat)
{
return mat.coeff(row, col);
}
};
/*** vectorization ***/
template<typename Derived, int Start, int Length>
struct ei_prod_vec_unroller
{
enum {
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
HalfLength = Length/2
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return ei_pmul(
ei_prod_vec_unroller<Derived, Start, HalfLength>::run(mat),
ei_prod_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
}
};
template<typename Derived, int Start>
struct ei_prod_vec_unroller<Derived, Start, 1>
{
enum {
index = Start * ei_packet_traits<typename Derived::Scalar>::size,
row = int(Derived::Flags)&RowMajorBit
? index / int(Derived::ColsAtCompileTime)
: index % Derived::RowsAtCompileTime,
col = int(Derived::Flags)&RowMajorBit
? index % int(Derived::ColsAtCompileTime)
: index / Derived::RowsAtCompileTime,
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
};
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
inline static PacketScalar run(const Derived &mat)
{
return mat.template packet<alignment>(row, col);
}
};
/***************************************************************************
* Part 3 : implementation of all cases
***************************************************************************/
template<typename Derived,
int Vectorization = ei_prod_traits<Derived>::Vectorization,
int Unrolling = ei_prod_traits<Derived>::Unrolling
>
struct ei_prod_impl;
template<typename Derived>
struct ei_prod_impl<Derived, NoVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
static Scalar run(const Derived& mat)
{
ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
Scalar res;
res = mat.coeff(0, 0);
for(int i = 1; i < mat.rows(); ++i)
res *= mat.coeff(i, 0);
for(int j = 1; j < mat.cols(); ++j)
for(int i = 0; i < mat.rows(); ++i)
res *= mat.coeff(i, j);
return res;
}
};
template<typename Derived>
struct ei_prod_impl<Derived, NoVectorization, CompleteUnrolling>
: public ei_prod_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
{};
template<typename Derived>
struct ei_prod_impl<Derived, LinearVectorization, NoUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
static Scalar run(const Derived& mat)
{
const int size = mat.size();
const int packetSize = ei_packet_traits<Scalar>::size;
const int alignedStart = (Derived::Flags & AlignedBit)
|| !(Derived::Flags & DirectAccessBit)
? 0
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
enum {
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
? Aligned : Unaligned
};
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
const int alignedEnd = alignedStart + alignedSize;
Scalar res;
if(alignedSize)
{
PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
packet_res = ei_pmul(packet_res, mat.template packet<alignment>(index));
res = ei_predux_mul(packet_res);
}
else // too small to vectorize anything.
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
{
res = Scalar(1);
}
for(int index = 0; index < alignedStart; ++index)
res *= mat.coeff(index);
for(int index = alignedEnd; index < size; ++index)
res *= mat.coeff(index);
return res;
}
};
template<typename Derived>
struct ei_prod_impl<Derived, LinearVectorization, CompleteUnrolling>
{
typedef typename Derived::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
Size = Derived::SizeAtCompileTime,
VectorizationSize = (Size / PacketSize) * PacketSize
};
static Scalar run(const Derived& mat)
{
Scalar res = ei_predux_mul(ei_prod_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
if (VectorizationSize != Size)
res *= ei_prod_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
return res;
}
};
/***************************************************************************
* Part 4 : implementation of MatrixBase methods
***************************************************************************/
/** \returns the product of all coefficients of *this
*
* Example: \include MatrixBase_prod.cpp
* Output: \verbinclude MatrixBase_prod.out
*
* \sa sum()
*/
template<typename Derived>
inline typename ei_traits<Derived>::Scalar
MatrixBase<Derived>::prod() const
{
typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
return ei_prod_impl<ThisNested>::run(derived());
}
#endif // EIGEN_PROD_H

View File

@ -246,7 +246,7 @@ struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
/** \returns the sum of all coefficients of *this /** \returns the sum of all coefficients of *this
* *
* \sa trace() * \sa trace(), prod()
*/ */
template<typename Derived> template<typename Derived>
inline typename ei_traits<Derived>::Scalar inline typename ei_traits<Derived>::Scalar

View File

@ -214,6 +214,23 @@ template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs)
return _mm_add_epi32(tmp0, tmp2); return _mm_add_epi32(tmp0, tmp2);
} }
// Other reduction functions:
template<> EIGEN_STRONG_INLINE float ei_predux_mul<__m128>(const __m128& a)
{
__m128 tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
return ei_pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
}
template<> EIGEN_STRONG_INLINE double ei_predux_mul<__m128d>(const __m128d& a)
{
return ei_pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
}
template<> EIGEN_STRONG_INLINE int ei_predux_mul<__m128i>(const __m128i& a)
{
__m128i tmp = ei_pmul(a, _mm_unpackhi_epi64(a,a));
return ei_pfirst(tmp) * ei_pfirst(_mm_shuffle_epi32(tmp, 1));
}
#if (defined __GNUC__) #if (defined __GNUC__)
// template <> EIGEN_STRONG_INLINE __m128 ei_pmadd(const __m128& a, const __m128& b, const __m128& c) // template <> EIGEN_STRONG_INLINE __m128 ei_pmadd(const __m128& a, const __m128& b, const __m128& c)
// { // {

View File

@ -0,0 +1,3 @@
Matrix3d m = Matrix3d::Random();
cout << "Here is the matrix m:" << endl << m << endl;
cout << "Here is the product of all the coefficients:" << endl << m.prod() << endl;

View File

@ -0,0 +1,3 @@
Matrix3d m = Matrix3d::Random();
cout << "Here is the matrix m:" << endl << m << endl;
cout << "Here is the product of each row:" << endl << m.rowwise().prod() << endl;

View File

@ -149,6 +149,7 @@ ei_add_test(sparse_basic)
ei_add_test(sparse_product) ei_add_test(sparse_product)
ei_add_test(sparse_solvers " " "${SPARSE_LIBS}") ei_add_test(sparse_solvers " " "${SPARSE_LIBS}")
ei_add_test(reverse) ei_add_test(reverse)
ei_add_test(prod)
# print a summary of the different options # print a summary of the different options
message("************************************************************") message("************************************************************")

View File

@ -125,6 +125,11 @@ template<typename Scalar> void packetmath()
ref[0] += data1[i]; ref[0] += data1[i];
VERIFY(ei_isApprox(ref[0], ei_predux(ei_pload(data1))) && "ei_predux"); VERIFY(ei_isApprox(ref[0], ei_predux(ei_pload(data1))) && "ei_predux");
ref[0] = 1;
for (int i=0; i<PacketSize; ++i)
ref[0] *= data1[i];
VERIFY(ei_isApprox(ref[0], ei_predux_mul(ei_pload(data1))) && "ei_predux_mul");
for (int j=0; j<PacketSize; ++j) for (int j=0; j<PacketSize; ++j)
{ {
ref[j] = 0; ref[j] = 0;

86
test/prod.cpp Normal file
View File

@ -0,0 +1,86 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#include "main.h"
template<typename MatrixType> void matrixProd(const MatrixType& m)
{
typedef typename MatrixType::Scalar Scalar;
int rows = m.rows();
int cols = m.cols();
MatrixType m1 = MatrixType::Random(rows, cols);
VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).prod(), Scalar(1));
VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).prod(), Scalar(1));
Scalar x = Scalar(1);
for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) x *= m1(i,j);
VERIFY_IS_APPROX(m1.prod(), x);
}
template<typename VectorType> void vectorProd(const VectorType& w)
{
typedef typename VectorType::Scalar Scalar;
int size = w.size();
VectorType v = VectorType::Random(size);
for(int i = 1; i < size; i++)
{
Scalar s = Scalar(1);
for(int j = 0; j < i; j++) s *= v[j];
VERIFY_IS_APPROX(s, v.start(i).prod());
}
for(int i = 0; i < size-1; i++)
{
Scalar s = Scalar(1);
for(int j = i; j < size; j++) s *= v[j];
VERIFY_IS_APPROX(s, v.end(size-i).prod());
}
for(int i = 0; i < size/2; i++)
{
Scalar s = Scalar(1);
for(int j = i; j < size-i; j++) s *= v[j];
VERIFY_IS_APPROX(s, v.segment(i, size-2*i).prod());
}
}
void test_prod()
{
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( matrixProd(Matrix<float, 1, 1>()) );
CALL_SUBTEST( matrixProd(Matrix2f()) );
CALL_SUBTEST( matrixProd(Matrix4d()) );
CALL_SUBTEST( matrixProd(MatrixXcf(3, 3)) );
CALL_SUBTEST( matrixProd(MatrixXf(8, 12)) );
CALL_SUBTEST( matrixProd(MatrixXi(8, 12)) );
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST( vectorProd(VectorXf(5)) );
CALL_SUBTEST( vectorProd(VectorXd(10)) );
CALL_SUBTEST( vectorProd(VectorXf(33)) );
}
}