fine tuning in dot() and sum(), and prepare for the sparse versions...

2025-09-25 07:43:14 +08:00 · 2009-01-07 16:58:17 +00:00 · 2009-01-07 16:58:17 +00:00 · 336f0a8486
commit 336f0a8486
parent 6b9d647fc2
3 changed files with 19 additions and 23 deletions
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@ -143,12 +143,13 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>

 template<typename Derived1, typename Derived2,
         int Vectorization = ei_dot_traits<Derived1, Derived2>::Vectorization,
-         int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
+         int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling,
+         int Storage = (ei_traits<Derived1>::Flags | ei_traits<Derived2>::Flags) & SparseBit
 >
 struct ei_dot_impl;

 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
+struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling, IsDense>
 {
  typedef typename Derived1::Scalar Scalar;
  static Scalar run(const Derived1& v1, const Derived2& v2)
@ -163,12 +164,12 @@ struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
 };

 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
+struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling, IsDense>
  : public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
 {};

 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
+struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling, IsDense>
 {
  typedef typename Derived1::Scalar Scalar;
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@ -221,7 +222,7 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
 };

 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
+struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling, IsDense>
 {
  typedef typename Derived1::Scalar Scalar;
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@ -258,20 +259,15 @@ template<typename OtherDerived>
 typename ei_traits<Derived>::Scalar
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
-  typedef typename Derived::Nested Nested;
-  typedef typename OtherDerived::Nested OtherNested;
-  typedef typename ei_unref<Nested>::type _Nested;
-  typedef typename ei_unref<OtherNested>::type _OtherNested;
-
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(_Nested)
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested)
-  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
  EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)

  ei_assert(size() == other.size());

-  return ei_dot_impl<_Nested, _OtherNested>::run(derived(), other.derived());
+  return ei_dot_impl<Derived, OtherDerived>::run(derived(), other.derived());
 }

 /** \returns the squared norm of *this, i.e. the dot product of *this with itself.
@ -287,7 +283,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 template<typename Derived>
 EIGEN_DEPRECATED inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm2() const
 {
-  return ei_real(dot(*this));
+  return ei_real((*this).cwise().abs2().sum());
 }

 /** \returns the squared norm of *this, i.e. the dot product of *this with itself.
@ -299,7 +295,7 @@ EIGEN_DEPRECATED inline typename NumTraits<typename ei_traits<Derived>::Scalar>:
 template<typename Derived>
 inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
 {
-  return ei_real(dot(*this));
+  return ei_real((*this).cwise().abs2().sum());
 }

 /** \returns the \em l2 norm of *this, i.e. the square root of the dot product of *this with itself.
--- a/Eigen/src/Core/Sum.h
+++ b/Eigen/src/Core/Sum.h
@ -42,7 +42,6 @@ public:
  enum {
    Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
                 && (int(Derived::Flags)&LinearAccessBit)
-                 && (int(Derived::SizeAtCompileTime)>2*PacketSize)
                  ? LinearVectorization
                  : NoVectorization
  };
@ -155,12 +154,13 @@ struct ei_sum_vec_unroller<Derived, Index, Stop, true>

 template<typename Derived,
         int Vectorization = ei_sum_traits<Derived>::Vectorization,
-         int Unrolling = ei_sum_traits<Derived>::Unrolling
+         int Unrolling = ei_sum_traits<Derived>::Unrolling,
+         int Storage = ei_traits<Derived>::Flags & SparseBit
 >
 struct ei_sum_impl;

 template<typename Derived>
-struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
+struct ei_sum_impl<Derived, NoVectorization, NoUnrolling, IsDense>
 {
  typedef typename Derived::Scalar Scalar;
  static Scalar run(const Derived& mat)
@ -178,12 +178,12 @@ struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
 };

 template<typename Derived>
-struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling>
+struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling, IsDense>
  : public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
 {};

 template<typename Derived>
-struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
+struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling,IsDense>
 {
  typedef typename Derived::Scalar Scalar;
  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@ -228,7 +228,7 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
 };

 template<typename Derived>
-struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
+struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling, IsDense>
 {
  typedef typename Derived::Scalar Scalar;
  static Scalar run(const Derived& mat)
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -73,7 +73,7 @@ inline T* ei_aligned_malloc(size_t size)
    // and this type has a custom operator new, then we want to honor this operator new!
    // so when we use C functions to allocate memory, we must be careful to call manually the constructor using
    // the special placement-new syntax.
-    return new(void_result) T[size];
+    return ::new(void_result) T[size];
  }
  else
    return new T[size]; // here we really want a new, not a malloc. Justification: if the user uses Eigen on