From 0ecae61568861b15fb6d189947007ba3c379ee10 Mon Sep 17 00:00:00 2001 From: Chip Kerchner Date: Fri, 10 Feb 2023 20:06:58 +0000 Subject: [PATCH] Disable array BF16 to F32 conversions in Power --- .../arch/AltiVec/MatrixProductMMAbfloat16.h | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h index 91c1dd764..392e027c7 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixProductMMAbfloat16.h @@ -189,27 +189,11 @@ void gemmMMAbfloat16(const DataMapper& res, const bfloat16* blockA, const bfloat ei_declare_aligned_stack_constructed_variable(float, result, cols*rows, 0); typedef typename DataMapper::LinearMapper LinearMapper; - Packet4f z = pset1(float(0)); for(Index j = 0; j < cols; j++){ const LinearMapper res2 = res.getLinearMapper(0, j); float *result2 = result + j*rows; - Index i = 0; - for(; i + 32 <= rows; i+=32){ - Packet4f r32_0 = reinterpret_cast(res2.template loadPacket(i + 0).m_val); - Packet4f r32_1 = reinterpret_cast(res2.template loadPacket(i + 8).m_val); - Packet4f r32_2 = reinterpret_cast(res2.template loadPacket(i + 16).m_val); - Packet4f r32_3 = reinterpret_cast(res2.template loadPacket(i + 24).m_val); - pstore(result2 + i + 0, vec_mergeo(r32_0, z)); - pstore(result2 + i + 4, vec_mergee(r32_0, z)); - pstore(result2 + i + 8, vec_mergeo(r32_1, z)); - pstore(result2 + i + 12, vec_mergee(r32_1, z)); - pstore(result2 + i + 16, vec_mergeo(r32_2, z)); - pstore(result2 + i + 20, vec_mergee(r32_2, z)); - pstore(result2 + i + 24, vec_mergeo(r32_3, z)); - pstore(result2 + i + 28, vec_mergee(r32_3, z)); - } BFLOAT16_UNROLL - for(; i < rows; i++){ + for(Index i = 0; i < rows; i++){ result2[i] = res2(i); } }