From 2b7b7aac57130228c8057aac7c4782bd1278c923 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 2 Aug 2024 20:40:53 +0000 Subject: [PATCH] Speed up complex * complex matrix multiplication. --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 1b7861ab1..b65c246e7 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -718,10 +718,10 @@ class gebp_traits, std::complex, ConjLhs_, LhsPacketSize = Vectorizable ? unpacket_traits::size : 1, RhsPacketSize = Vectorizable ? unpacket_traits::size : 1, RealPacketSize = Vectorizable ? unpacket_traits::size : 1, + NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, - // FIXME: should depend on NumberOfRegisters nr = 4, - mr = ResPacketSize, + mr = (plain_enum_min(16, NumberOfRegisters) / 2 / nr) * ResPacketSize, LhsProgress = ResPacketSize, RhsProgress = 1 @@ -795,8 +795,8 @@ class gebp_traits, std::complex, ConjLhs_, DoublePacket& c, TmpType& /*tmp*/, const LaneIdType&) const { - c.first = padd(pmul(a, b.first), c.first); - c.second = padd(pmul(a, b.second), c.second); + c.first = pmadd(a, b.first, c.first); + c.second = pmadd(a, b.second, c.second); } template