diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 83cec500f..6e6452099 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -699,6 +699,26 @@ protected: DstXprType& m_dstExpr; }; +// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the +// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used +// when computing the product. + +template +class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel +{ +protected: + typedef generic_dense_assignment_kernel Base; + public: + typedef typename Base::Scalar Scalar; + typedef typename Base::DstXprType DstXprType; + typedef typename find_best_packet::type PacketType; + + EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) + : Base(dst, src, func, dstExpr) + { + } + }; + /*************************************************************************** * Part 5 : Entry point for dense rectangular assignment ***************************************************************************/ @@ -837,6 +857,27 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) Assignment::run(actualDst, src, func); } + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +{ + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + typedef restricted_packet_dense_assignment_kernel Kernel; + + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); + + SrcEvaluatorType srcEvaluator(src); + resize_if_allowed(dst, src, func); + + DstEvaluatorType dstEvaluator(dst); + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias(Dst& dst, const Src& src) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 0762d9e8b..246bca3e5 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -424,7 +424,7 @@ struct generic_product_impl void eval_dynamic(Dst& dst, const CwiseBinaryOp, const CwiseNullaryOp, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func) { - call_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func); + call_restricted_packet_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func); } // Here, we we always have LhsT==Lhs, but we need to make it a template type to make the above @@ -433,7 +433,7 @@ struct generic_product_impl static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic(Dst& dst, const LhsT& lhs, const Rhs& rhs, const Func &func) { - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func); + call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func); }