* add ei_predux_mul internal function

* apply Ricard Marxer's prod() patch with fixes for the vectorized path
2025-09-25 15:53:19 +08:00 · 2009-02-10 18:06:05 +00:00 · 2009-02-10 18:06:05 +00:00 · cbbc6d940b
commit cbbc6d940b
parent a0cc5fba0a
12 changed files with 397 additions and 1 deletions
--- a/Eigen/Core
+++ b/Eigen/Core
@ -139,6 +139,7 @@ namespace Eigen {
 #include "src/Core/DiagonalMatrix.h"
 #include "src/Core/DiagonalCoeffs.h"
 #include "src/Core/Sum.h"
 #include "src/Core/Prod.h"
 #include "src/Core/Redux.h"
 #include "src/Core/Visitor.h"
 #include "src/Core/Fuzzy.h"
--- a/Eigen/src/Array/PartialRedux.h
+++ b/Eigen/src/Array/PartialRedux.h
@ -115,6 +115,8 @@ EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
 EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
 /** \internal */
 template <typename BinaryOp, typename Scalar>
@ -258,6 +260,16 @@ template<typename ExpressionType, int Direction> class PartialRedux
    const PartialReduxExpr<ExpressionType, ei_member_count<int>, Direction> count() const
    { return _expression(); }
    /** \returns a row (or column) vector expression of the product
      * of each column (or row) of the referenced expression.
      *
      * Example: \include PartialRedux_prod.cpp
      * Output: \verbinclude PartialRedux_prod.out
      *
      * \sa MatrixBase::prod() */
    const typename ReturnType<ei_member_prod>::Type prod() const
    { return _expression(); }
    /** \returns a matrix expression
      * where each column (or row) are reversed.
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@ -96,6 +96,10 @@ ei_preduxp(const Packet* vecs) { return vecs[0]; }
 template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux(const Packet& a)
 { return a; }
 /** \internal \returns the product of the elements of \a a*/
 template<typename Packet> inline typename ei_unpacket_traits<Packet>::type ei_predux_mul(const Packet& a)
 { return a; }
 /** \internal \returns the reversed elements of \a a*/
 template<typename Packet> inline Packet ei_preverse(const Packet& a)
 { return a; }
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -535,6 +535,8 @@ template<typename Derived> class MatrixBase
    Scalar sum() const;
    Scalar trace() const;
    Scalar prod() const;
    typename ei_traits<Derived>::Scalar minCoeff() const;
    typename ei_traits<Derived>::Scalar maxCoeff() const;
--- a/Eigen/src/Core/Prod.h
+++ b/Eigen/src/Core/Prod.h
@ -0,0 +1,262 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
 // Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3 of the License, or (at your option) any later version.
 //
 // Alternatively, you can redistribute it and/or
 // modify it under the terms of the GNU General Public License as
 // published by the Free Software Foundation; either version 2 of
 // the License, or (at your option) any later version.
 //
 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 #ifndef EIGEN_PROD_H
 #define EIGEN_PROD_H
 /***************************************************************************
 * Part 1 : the logic deciding a strategy for vectorization and unrolling
 ***************************************************************************/
 template<typename Derived>
 struct ei_prod_traits
 {
 private:
  enum {
    PacketSize = ei_packet_traits<typename Derived::Scalar>::size
  };
 public:
  enum {
    Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
                 && (int(Derived::Flags)&LinearAccessBit)
                  ? LinearVectorization
                  : NoVectorization
  };
 private:
  enum {
    Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
           + (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::MulCost,
    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
  };
 public:
  enum {
    Unrolling = Cost <= UnrollingLimit
              ? CompleteUnrolling
              : NoUnrolling
  };
 };
 /***************************************************************************
 * Part 2 : unrollers
 ***************************************************************************/
 /*** no vectorization ***/
 template<typename Derived, int Start, int Length>
 struct ei_prod_novec_unroller
 {
  enum {
    HalfLength = Length/2
  };
  typedef typename Derived::Scalar Scalar;
  inline static Scalar run(const Derived &mat)
  {
    return ei_prod_novec_unroller<Derived, Start, HalfLength>::run(mat)
         * ei_prod_novec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat);
  }
 };
 template<typename Derived, int Start>
 struct ei_prod_novec_unroller<Derived, Start, 1>
 {
  enum {
    col = Start / Derived::RowsAtCompileTime,
    row = Start % Derived::RowsAtCompileTime
  };
  typedef typename Derived::Scalar Scalar;
  inline static Scalar run(const Derived &mat)
  {
    return mat.coeff(row, col);
  }
 };
 /*** vectorization ***/
 template<typename Derived, int Start, int Length>
 struct ei_prod_vec_unroller
 {
  enum {
    PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
    HalfLength = Length/2
  };
  typedef typename Derived::Scalar Scalar;
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
  inline static PacketScalar run(const Derived &mat)
  {
    return ei_pmul(
            ei_prod_vec_unroller<Derived, Start, HalfLength>::run(mat),
            ei_prod_vec_unroller<Derived, Start+HalfLength, Length-HalfLength>::run(mat) );
  }
 };
 template<typename Derived, int Start>
 struct ei_prod_vec_unroller<Derived, Start, 1>
 {
  enum {
    index = Start * ei_packet_traits<typename Derived::Scalar>::size,
    row = int(Derived::Flags)&RowMajorBit
        ? index / int(Derived::ColsAtCompileTime)
        : index % Derived::RowsAtCompileTime,
    col = int(Derived::Flags)&RowMajorBit
        ? index % int(Derived::ColsAtCompileTime)
        : index / Derived::RowsAtCompileTime,
    alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
  };
  typedef typename Derived::Scalar Scalar;
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
  inline static PacketScalar run(const Derived &mat)
  {
    return mat.template packet<alignment>(row, col);
  }
 };
 /***************************************************************************
 * Part 3 : implementation of all cases
 ***************************************************************************/
 template<typename Derived,
         int Vectorization = ei_prod_traits<Derived>::Vectorization,
         int Unrolling = ei_prod_traits<Derived>::Unrolling
 >
 struct ei_prod_impl;
 template<typename Derived>
 struct ei_prod_impl<Derived, NoVectorization, NoUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
  static Scalar run(const Derived& mat)
  {
    ei_assert(mat.rows()>0 && mat.cols()>0 && "you are using a non initialized matrix");
    Scalar res;
    res = mat.coeff(0, 0);
    for(int i = 1; i < mat.rows(); ++i)
      res *= mat.coeff(i, 0);
    for(int j = 1; j < mat.cols(); ++j)
      for(int i = 0; i < mat.rows(); ++i)
        res *= mat.coeff(i, j);
    return res;
  }
 };
 template<typename Derived>
 struct ei_prod_impl<Derived, NoVectorization, CompleteUnrolling>
  : public ei_prod_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
 {};
 template<typename Derived>
 struct ei_prod_impl<Derived, LinearVectorization, NoUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
  static Scalar run(const Derived& mat)
  {
    const int size = mat.size();
    const int packetSize = ei_packet_traits<Scalar>::size;
    const int alignedStart =  (Derived::Flags & AlignedBit)
                           || !(Derived::Flags & DirectAccessBit)
                           ? 0
                           : ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
    enum {
      alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
                ? Aligned : Unaligned
    };
    const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
    const int alignedEnd = alignedStart + alignedSize;
    Scalar res;
    if(alignedSize)
    {
      PacketScalar packet_res = mat.template packet<alignment>(alignedStart);
      for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
        packet_res = ei_pmul(packet_res, mat.template packet<alignment>(index));
      res = ei_predux_mul(packet_res);
    }
    else // too small to vectorize anything.
         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
    {
      res = Scalar(1);
    }
    for(int index = 0; index < alignedStart; ++index)
      res *= mat.coeff(index);
    for(int index = alignedEnd; index < size; ++index)
      res *= mat.coeff(index);
    return res;
  }
 };
 template<typename Derived>
 struct ei_prod_impl<Derived, LinearVectorization, CompleteUnrolling>
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
  enum {
    PacketSize = ei_packet_traits<Scalar>::size,
    Size = Derived::SizeAtCompileTime,
    VectorizationSize = (Size / PacketSize) * PacketSize
  };
  static Scalar run(const Derived& mat)
  {
    Scalar res = ei_predux_mul(ei_prod_vec_unroller<Derived, 0, Size / PacketSize>::run(mat));
    if (VectorizationSize != Size)
      res *= ei_prod_novec_unroller<Derived, VectorizationSize, Size-VectorizationSize>::run(mat);
    return res;
  }
 };
 /***************************************************************************
 * Part 4 : implementation of MatrixBase methods
 ***************************************************************************/
 /** \returns the product of all coefficients of *this
  *
  * Example: \include MatrixBase_prod.cpp
  * Output: \verbinclude MatrixBase_prod.out
  *
  * \sa sum()
  */
 template<typename Derived>
 inline typename ei_traits<Derived>::Scalar
 MatrixBase<Derived>::prod() const
 {
  typedef typename ei_cleantype<typename Derived::Nested>::type ThisNested;
  return ei_prod_impl<ThisNested>::run(derived());
 }
 #endif // EIGEN_PROD_H
--- a/Eigen/src/Core/Sum.h
+++ b/Eigen/src/Core/Sum.h
@ -246,7 +246,7 @@ struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
 /** \returns the sum of all coefficients of *this
  *
-  * \sa trace()
+  * \sa trace(), prod()
  */
 template<typename Derived>
 inline typename ei_traits<Derived>::Scalar
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -214,6 +214,23 @@ template<> EIGEN_STRONG_INLINE __m128i ei_preduxp<__m128i>(const __m128i* vecs)
  return _mm_add_epi32(tmp0, tmp2);
 }
 // Other reduction functions:
 template<> EIGEN_STRONG_INLINE float ei_predux_mul<__m128>(const __m128& a)
 {
  __m128 tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
  return ei_pfirst(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
 }
 template<> EIGEN_STRONG_INLINE double ei_predux_mul<__m128d>(const __m128d& a)
 {
  return ei_pfirst(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
 }
 template<> EIGEN_STRONG_INLINE int ei_predux_mul<__m128i>(const __m128i& a)
 {
  __m128i tmp = ei_pmul(a, _mm_unpackhi_epi64(a,a));
  return ei_pfirst(tmp) * ei_pfirst(_mm_shuffle_epi32(tmp, 1));
 }
 #if (defined __GNUC__)
 // template <> EIGEN_STRONG_INLINE __m128 ei_pmadd(const __m128&  a, const __m128&  b, const __m128&  c)
 // {
--- a/doc/snippets/MatrixBase_prod.cpp
+++ b/doc/snippets/MatrixBase_prod.cpp
@ -0,0 +1,3 @@
 Matrix3d m = Matrix3d::Random();
 cout << "Here is the matrix m:" << endl << m << endl;
 cout << "Here is the product of all the coefficients:" << endl << m.prod() << endl;
--- a/doc/snippets/PartialRedux_prod.cpp
+++ b/doc/snippets/PartialRedux_prod.cpp
@ -0,0 +1,3 @@
 Matrix3d m = Matrix3d::Random();
 cout << "Here is the matrix m:" << endl << m << endl;
 cout << "Here is the product of each row:" << endl << m.rowwise().prod() << endl;
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -149,6 +149,7 @@ ei_add_test(sparse_basic)
 ei_add_test(sparse_product)
 ei_add_test(sparse_solvers " " "${SPARSE_LIBS}")
 ei_add_test(reverse)
 ei_add_test(prod)
 # print a summary of the different options
 message("************************************************************")
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@ -125,6 +125,11 @@ template<typename Scalar> void packetmath()
    ref[0] += data1[i];
  VERIFY(ei_isApprox(ref[0], ei_predux(ei_pload(data1))) && "ei_predux");
  ref[0] = 1;
  for (int i=0; i<PacketSize; ++i)
    ref[0] *= data1[i];
  VERIFY(ei_isApprox(ref[0], ei_predux_mul(ei_pload(data1))) && "ei_predux_mul");
  for (int j=0; j<PacketSize; ++j)
  {
    ref[j] = 0;
--- a/test/prod.cpp
+++ b/test/prod.cpp
@ -0,0 +1,86 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
 // Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3 of the License, or (at your option) any later version.
 //
 // Alternatively, you can redistribute it and/or
 // modify it under the terms of the GNU General Public License as
 // published by the Free Software Foundation; either version 2 of
 // the License, or (at your option) any later version.
 //
 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 #include "main.h"
 template<typename MatrixType> void matrixProd(const MatrixType& m)
 {
  typedef typename MatrixType::Scalar Scalar;
  int rows = m.rows();
  int cols = m.cols();
  MatrixType m1 = MatrixType::Random(rows, cols);
  VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).prod(), Scalar(1));
  VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).prod(), Scalar(1));
  Scalar x = Scalar(1);
  for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) x *= m1(i,j);
  VERIFY_IS_APPROX(m1.prod(), x);
 }
 template<typename VectorType> void vectorProd(const VectorType& w)
 {
  typedef typename VectorType::Scalar Scalar;
  int size = w.size();
  VectorType v = VectorType::Random(size);
  for(int i = 1; i < size; i++)
  {
    Scalar s = Scalar(1);
    for(int j = 0; j < i; j++) s *= v[j];
    VERIFY_IS_APPROX(s, v.start(i).prod());
  }
  for(int i = 0; i < size-1; i++)
  {
    Scalar s = Scalar(1);
    for(int j = i; j < size; j++) s *= v[j];
    VERIFY_IS_APPROX(s, v.end(size-i).prod());
  }
  for(int i = 0; i < size/2; i++)
  {
    Scalar s = Scalar(1);
    for(int j = i; j < size-i; j++) s *= v[j];
    VERIFY_IS_APPROX(s, v.segment(i, size-2*i).prod());
  }
 }
 void test_prod()
 {
  for(int i = 0; i < g_repeat; i++) {
    CALL_SUBTEST( matrixProd(Matrix<float, 1, 1>()) );
    CALL_SUBTEST( matrixProd(Matrix2f()) );
    CALL_SUBTEST( matrixProd(Matrix4d()) );
    CALL_SUBTEST( matrixProd(MatrixXcf(3, 3)) );
    CALL_SUBTEST( matrixProd(MatrixXf(8, 12)) );
    CALL_SUBTEST( matrixProd(MatrixXi(8, 12)) );
  }
  for(int i = 0; i < g_repeat; i++) {
    CALL_SUBTEST( vectorProd(VectorXf(5)) );
    CALL_SUBTEST( vectorProd(VectorXd(10)) );
    CALL_SUBTEST( vectorProd(VectorXf(33)) );
  }
 }