From 450d0c3de044c9f32fa2f37fee821f6e390df382 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 25 Apr 2014 22:25:48 +0200 Subject: [PATCH] Make sure that calls to broadcast4 are 16 bytes aligned --- Eigen/src/Core/arch/SSE/PacketMath.h | 6 +++--- Eigen/src/Core/products/TriangularMatrixMatrix.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index ad935d5f1..6912f3bc3 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -486,7 +486,7 @@ template<> EIGEN_STRONG_INLINE void pbroadcast4(const float *a, Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3) { - a3 = ploadu(a); + a3 = pload(a); a0 = vec4f_swizzle1(a3, 0,0,0,0); a1 = vec4f_swizzle1(a3, 1,1,1,1); a2 = vec4f_swizzle1(a3, 2,2,2,2); @@ -502,10 +502,10 @@ pbroadcast4(const double *a, a2 = _mm_loaddup_pd(a+2); a3 = _mm_loaddup_pd(a+3); #else - a1 = ploadu(a); + a1 = pload(a); a0 = vec2d_swizzle1(a1, 0,0); a1 = vec2d_swizzle1(a1, 1,1); - a3 = ploadu(a+2); + a3 = pload(a+2); a2 = vec2d_swizzle1(a3, 0,0); a3 = vec2d_swizzle1(a3, 1,1); #endif diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 62575aff4..8088aa691 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -300,6 +300,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix=cols) ? 0 : actual_kc; Scalar* geb = blockB+ts*ts; + geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar)); pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);