diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 967deaffb..a099160c2 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -75,14 +75,14 @@ struct ei_product_triangular_matrix_matrix - ::run(rows, cols, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); + ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); } }; @@ -138,6 +138,7 @@ struct ei_product_triangular_matrix_matrixrows)) { actual_kc = rows-k2; @@ -191,7 +192,7 @@ struct ei_product_triangular_matrix_matrix GEPP { Index start = IsLower ? k2 : 0; - Index end = IsLower ? rows : actual_k2; + Index end = IsLower ? rows : std::min(actual_k2,rows); for(Index i2=start; i20; IsLower ? k2+=kc : k2-=kc) { - const Index actual_kc = std::min(IsLower ? depth-k2 : k2, kc); + Index actual_kc = std::min(IsLower ? depth-k2 : k2, kc); Index actual_k2 = IsLower ? k2 : k2-actual_kc; - Index rs = IsLower ? actual_k2 : depth - k2; - Scalar* geb = blockB+actual_kc*actual_kc; + + // align blocks with the end of the triangular part for trapezoidal rhs + if(IsLower && (k2cols)) + { + actual_kc = cols-k2; + k2 = actual_k2 + actual_kc - kc; + } + + // remaining size + Index rs = IsLower ? std::min(cols,actual_k2) : cols - k2; + // size of the triangular part + Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc; + + Scalar* geb = blockB+ts*ts; pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, alpha, actual_kc, rs); // pack the triangular part of the rhs padding the unrolled blocks with zeros + if(ts>0) { for (Index j2=0; j20) { for (Index j2=0; j2 void trmm(int size,int othersize) +template void trmm(int size,int /*othersize*/) { typedef typename NumTraits::Real RealScalar; - typedef Matrix MatrixType; + typedef Matrix MatrixColMaj; + typedef Matrix MatrixRowMaj; - MatrixType tri(size,size), upTri(size,size), loTri(size,size), - unitUpTri(size,size), unitLoTri(size,size); - MatrixType ge1(size,othersize), ge2(10,size), ge3; - Matrix rge3; + int rows = size; + int cols = ei_random(1,size); + + MatrixColMaj triV(rows,cols), triH(cols,rows), upTri(cols,rows), loTri(rows,cols), + unitUpTri(cols,rows), unitLoTri(rows,cols); + MatrixColMaj ge1(rows,cols), ge2(cols,rows), ge3; + MatrixRowMaj rge3; Scalar s1 = ei_random(), s2 = ei_random(); - tri.setRandom(); - loTri = tri.template triangularView(); - upTri = tri.template triangularView(); - unitLoTri = tri.template triangularView(); - unitUpTri = tri.template triangularView(); + triV.setRandom(); + triH.setRandom(); + loTri = triV.template triangularView(); + upTri = triH.template triangularView(); + unitLoTri = triV.template triangularView(); + unitUpTri = triH.template triangularView(); ge1.setRandom(); ge2.setRandom(); - VERIFY_IS_APPROX( ge3 = tri.template triangularView() * ge1, loTri * ge1); - VERIFY_IS_APPROX(rge3 = tri.template triangularView() * ge1, loTri * ge1); - VERIFY_IS_APPROX( ge3 = tri.template triangularView() * ge1, upTri * ge1); - VERIFY_IS_APPROX(rge3 = tri.template triangularView() * ge1, upTri * ge1); - VERIFY_IS_APPROX( ge3 = (s1*tri.adjoint()).template triangularView() * (s2*ge1), s1*loTri.adjoint() * (s2*ge1)); - VERIFY_IS_APPROX(rge3 = tri.adjoint().template triangularView() * ge1, loTri.adjoint() * ge1); - VERIFY_IS_APPROX( ge3 = tri.adjoint().template triangularView() * ge1, upTri.adjoint() * ge1); - VERIFY_IS_APPROX(rge3 = tri.adjoint().template triangularView() * ge1, upTri.adjoint() * ge1); - VERIFY_IS_APPROX( ge3 = tri.template triangularView() * ge2.adjoint(), loTri * ge2.adjoint()); - VERIFY_IS_APPROX(rge3 = tri.template triangularView() * ge2.adjoint(), loTri * ge2.adjoint()); - VERIFY_IS_APPROX( ge3 = tri.template triangularView() * ge2.adjoint(), upTri * ge2.adjoint()); - VERIFY_IS_APPROX(rge3 = tri.template triangularView() * ge2.adjoint(), upTri * ge2.adjoint()); - VERIFY_IS_APPROX( ge3 = (s1*tri).adjoint().template triangularView() * ge2.adjoint(), ei_conj(s1) * loTri.adjoint() * ge2.adjoint()); - VERIFY_IS_APPROX(rge3 = tri.adjoint().template triangularView() * ge2.adjoint(), loTri.adjoint() * ge2.adjoint()); - VERIFY_IS_APPROX( ge3 = tri.adjoint().template triangularView() * ge2.adjoint(), upTri.adjoint() * ge2.adjoint()); - VERIFY_IS_APPROX(rge3 = tri.adjoint().template triangularView() * ge2.adjoint(), upTri.adjoint() * ge2.adjoint()); + VERIFY_IS_APPROX( ge3 = triV.template triangularView() * ge2, loTri * ge2); + VERIFY_IS_APPROX( ge3 = ge2 * triV.template triangularView(), ge2 * loTri); + VERIFY_IS_APPROX( ge3 = triH.template triangularView() * ge1, upTri * ge1); + VERIFY_IS_APPROX( ge3 = ge1 * triH.template triangularView(), ge1 * upTri); + VERIFY_IS_APPROX( ge3 = (s1*triV.adjoint()).template triangularView() * (s2*ge1), s1*loTri.adjoint() * (s2*ge1)); + VERIFY_IS_APPROX( ge3 = ge1 * triV.adjoint().template triangularView(), ge1 * loTri.adjoint()); + VERIFY_IS_APPROX( ge3 = triH.adjoint().template triangularView() * ge2, upTri.adjoint() * ge2); + VERIFY_IS_APPROX( ge3 = ge2 * triH.adjoint().template triangularView(), ge2 * upTri.adjoint()); + VERIFY_IS_APPROX( ge3 = triV.template triangularView() * ge1.adjoint(), loTri * ge1.adjoint()); + VERIFY_IS_APPROX( ge3 = ge1.adjoint() * triV.template triangularView(), ge1.adjoint() * loTri); + VERIFY_IS_APPROX( ge3 = triH.template triangularView() * ge2.adjoint(), upTri * ge2.adjoint()); + VERIFY_IS_APPROX(rge3.noalias() = triH.template triangularView() * ge2.adjoint(), upTri * ge2.adjoint()); + VERIFY_IS_APPROX( ge3 = (s1*triV).adjoint().template triangularView() * ge2.adjoint(), ei_conj(s1) * loTri.adjoint() * ge2.adjoint()); + VERIFY_IS_APPROX(rge3.noalias() = triV.adjoint().template triangularView() * ge2.adjoint(), loTri.adjoint() * ge2.adjoint()); + VERIFY_IS_APPROX( ge3 = triH.adjoint().template triangularView() * ge1.adjoint(), upTri.adjoint() * ge1.adjoint()); + VERIFY_IS_APPROX(rge3.noalias() = triH.adjoint().template triangularView() * ge1.adjoint(), upTri.adjoint() * ge1.adjoint()); - VERIFY_IS_APPROX( ge3 = tri.template triangularView() * ge1, unitLoTri * ge1); - VERIFY_IS_APPROX(rge3 = tri.template triangularView() * ge1, unitLoTri * ge1); - VERIFY_IS_APPROX( ge3 = (s1*tri).adjoint().template triangularView() * ge2.adjoint(), ei_conj(s1) * unitLoTri.adjoint() * ge2.adjoint()); + VERIFY_IS_APPROX( ge3 = triV.template triangularView() * ge2, unitLoTri * ge2); + VERIFY_IS_APPROX( rge3.noalias() = ge2 * triV.template triangularView(), ge2 * unitLoTri); + VERIFY_IS_APPROX( ge3 = ge2 * triV.template triangularView(), ge2 * unitLoTri); + VERIFY_IS_APPROX( ge3 = (s1*triV).adjoint().template triangularView() * ge2.adjoint(), ei_conj(s1) * unitLoTri.adjoint() * ge2.adjoint()); } void test_product_trmm()