From d9f638049994b90ed388c68c8a0ab7efc2e5615c Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 27 Feb 2010 10:03:27 -0500 Subject: [PATCH] Remove the dot product's separate implementation and use cwiseProduct.sum instead. Also take special care to get nicely working static assertions. --- Eigen/src/Core/Dot.h | 227 ++------------------ Eigen/src/Core/products/CoeffBasedProduct.h | 20 +- 2 files changed, 18 insertions(+), 229 deletions(-) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 201bd23ca..72f6c571d 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2006-2008, 2010 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,224 +25,28 @@ #ifndef EIGEN_DOT_H #define EIGEN_DOT_H -/*************************************************************************** -* Part 1 : the logic deciding a strategy for vectorization and unrolling -***************************************************************************/ - -template -struct ei_dot_traits +// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot +// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE +// looking at the static assertions. Thus this is a trick to get better compile errors. +template::ret> +struct ei_dot_nocheck { -public: - enum { - Traversal = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit) - && (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit) - ? LinearVectorizedTraversal - : DefaultTraversal - }; - -private: - typedef typename Derived1::Scalar Scalar; - enum { - PacketSize = ei_packet_traits::size, - Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits::MulCost) - + (Derived1::SizeAtCompileTime-1) * NumTraits::AddCost, - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) - }; - -public: - enum { - Unrolling = Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling - }; -}; - -/*************************************************************************** -* Part 2 : unrollers -***************************************************************************/ - -/*** no vectorization ***/ - -template -struct ei_dot_novec_unroller -{ - enum { - HalfLength = Length/2 - }; - - typedef typename Derived1::Scalar Scalar; - - inline static Scalar run(const Derived1& v1, const Derived2& v2) + static inline typename ei_traits::Scalar run(const MatrixBase& a, const MatrixBase& b) { - return ei_dot_novec_unroller::run(v1, v2) - + ei_dot_novec_unroller::run(v1, v2); + return a.conjugate().cwiseProduct(b).sum(); } }; -template -struct ei_dot_novec_unroller +template +struct ei_dot_nocheck { - typedef typename Derived1::Scalar Scalar; - - inline static Scalar run(const Derived1& v1, const Derived2& v2) + static inline typename ei_traits::Scalar run(const MatrixBase&, const MatrixBase&) { - return ei_conj(v1.coeff(Start)) * v2.coeff(Start); + return typename ei_traits::Scalar(0); } }; -/*** vectorization ***/ - -template::size)> -struct ei_dot_vec_unroller -{ - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - enum { - row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index, - col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0, - row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index, - col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0 - }; - - inline static PacketScalar run(const Derived1& v1, const Derived2& v2) - { - return ei_pmadd( - v1.template packet(row1, col1), - v2.template packet(row2, col2), - ei_dot_vec_unroller::size, Stop>::run(v1, v2) - ); - } -}; - -template -struct ei_dot_vec_unroller -{ - enum { - row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index, - col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0, - row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index, - col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0, - alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned, - alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned - }; - - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - inline static PacketScalar run(const Derived1& v1, const Derived2& v2) - { - return ei_pmul(v1.template packet(row1, col1), v2.template packet(row2, col2)); - } -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Traversal, - int Unrolling = ei_dot_traits::Unrolling -> -struct ei_dot_impl; - -template -struct ei_dot_impl -{ - typedef typename Derived1::Scalar Scalar; - static Scalar run(const Derived1& v1, const Derived2& v2) - { - ei_assert(v1.size()>0 && "you are using a non initialized vector"); - Scalar res; - res = ei_conj(v1.coeff(0)) * v2.coeff(0); - for(int i = 1; i < v1.size(); ++i) - res += ei_conj(v1.coeff(i)) * v2.coeff(i); - return res; - } -}; - -template -struct ei_dot_impl - : public ei_dot_novec_unroller -{}; - -template -struct ei_dot_impl -{ - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - - static Scalar run(const Derived1& v1, const Derived2& v2) - { - const int size = v1.size(); - const int packetSize = ei_packet_traits::size; - const int alignedSize = (size/packetSize)*packetSize; - enum { - alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned, - alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned - }; - Scalar res; - - // do the vectorizable part of the sum - if(size >= packetSize) - { - PacketScalar packet_res = ei_pmul( - v1.template packet(0), - v2.template packet(0) - ); - for(int index = packetSize; index(index), - v2.template packet(index), - packet_res - ); - } - res = ei_predux(packet_res); - - // now we must do the rest without vectorization. - if(alignedSize == size) return res; - } - else // too small to vectorize anything. - // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. - { - res = Scalar(0); - } - - // do the remainder of the vector - for(int index = alignedSize; index < size; ++index) - { - res += v1.coeff(index) * v2.coeff(index); - } - - return res; - } -}; - -template -struct ei_dot_impl -{ - typedef typename Derived1::Scalar Scalar; - typedef typename ei_packet_traits::type PacketScalar; - enum { - PacketSize = ei_packet_traits::size, - Size = Derived1::SizeAtCompileTime, - VectorizedSize = (Size / PacketSize) * PacketSize - }; - static Scalar run(const Derived1& v1, const Derived2& v2) - { - Scalar res = ei_predux(ei_dot_vec_unroller::run(v1, v2)); - if (VectorizedSize != Size) - res += ei_dot_novec_unroller::run(v1, v2); - return res; - } -}; - -/*************************************************************************** -* Part 4 : implementation of MatrixBase methods -***************************************************************************/ - /** \returns the dot product of *this with other. * * \only_for_vectors @@ -266,10 +70,7 @@ MatrixBase::dot(const MatrixBase& other) const ei_assert(size() == other.size()); - // dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) ) - typedef typename ei_cleantype::type ThisNested; - typedef typename ei_cleantype::type OtherNested; - return ei_dot_impl::run(derived(), other.derived()); + return ei_dot_nocheck::run(*this, other); } /** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself. diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index 3343b1875..e8016e915 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -305,10 +305,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int row, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Block::ColsAtCompileTime>, - Block::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs.col(col)); + res = lhs.row(row).cwiseProduct(rhs.col(col)).sum(); } }; @@ -319,10 +316,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int /*row*/, int col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Lhs, - Block::RowsAtCompileTime, 1>, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs.col(col)); + res = lhs.cwiseProduct(rhs.col(col)).sum(); } }; @@ -331,10 +325,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int row, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Block::ColsAtCompileTime>, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs.row(row), rhs); + res = lhs.row(row).cwiseProduct(rhs).sum(); } }; @@ -343,10 +334,7 @@ struct ei_product_coeff_vectorized_dyn_selector { EIGEN_STRONG_INLINE static void run(int /*row*/, int /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) { - res = ei_dot_impl< - Lhs, - Rhs, - LinearVectorizedTraversal, NoUnrolling>::run(lhs, rhs); + res = lhs.cwiseProduct(rhs).sum(); } };