From 26234720bd9558ee38c9b8f76323160c05178011 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 28 Feb 2015 15:25:39 +0100 Subject: [PATCH] Fix bug #972: allow coeff-based products of depth 0 and remove a useless statement in coeff-based product. --- Eigen/src/Core/products/CoeffBasedProduct.h | 51 ++++++++++++++++----- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h index 421f925e1..763797f34 100644 --- a/Eigen/src/Core/products/CoeffBasedProduct.h +++ b/Eigen/src/Core/products/CoeffBasedProduct.h @@ -134,7 +134,7 @@ class CoeffBasedProduct }; typedef internal::product_coeff_impl ScalarCoeffImpl; typedef CoeffBasedProduct LazyCoeffBasedProductType; @@ -185,7 +185,7 @@ class CoeffBasedProduct { PacketScalar res; internal::product_packet_impl ::run(row, col, m_lhs, m_rhs, res); return res; @@ -243,7 +243,17 @@ struct product_coeff_impl static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) { product_coeff_impl::run(row, col, lhs, rhs, res); - res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); + res += lhs.coeff(row, UnrollingIndex-1) * rhs.coeff(UnrollingIndex-1, col); + } +}; + +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + { + res = lhs.coeff(row, 0) * rhs.coeff(0, col); } }; @@ -251,9 +261,9 @@ template struct product_coeff_impl { typedef typename Lhs::Index Index; - static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) { - res = lhs.coeff(row, 0) * rhs.coeff(0, col); + res = RetScalar(0); } }; @@ -293,6 +303,16 @@ struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> } }; +template +struct product_coeff_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, RetScalar &res) + { + res = 0; + } +}; + template struct product_coeff_impl { @@ -302,8 +322,7 @@ struct product_coeff_impl::run(row, col, lhs, rhs, pres); - product_coeff_impl::run(row, col, lhs, rhs, res); + product_coeff_vectorized_unroller::run(row, col, lhs, rhs, pres); res = predux(pres); } }; @@ -371,7 +390,7 @@ struct product_packet_impl static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl::run(row, col, lhs, rhs, res); - res = pmadd(pset1(lhs.coeff(row, UnrollingIndex)), rhs.template packet(UnrollingIndex, col), res); + res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); } }; @@ -382,12 +401,12 @@ struct product_packet_impl static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) { product_packet_impl::run(row, col, lhs, rhs, res); - res = pmadd(lhs.template packet(row, UnrollingIndex), pset1(rhs.coeff(UnrollingIndex, col)), res); + res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); } }; template -struct product_packet_impl +struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) @@ -397,7 +416,7 @@ struct product_packet_impl }; template -struct product_packet_impl +struct product_packet_impl { typedef typename Lhs::Index Index; static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) @@ -406,6 +425,16 @@ struct product_packet_impl } }; +template +struct product_packet_impl +{ + typedef typename Lhs::Index Index; + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Packet &res) + { + res = pset1(0); + } +}; + template struct product_packet_impl {