diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index edd79bd9a..e929b8d89 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -298,8 +298,7 @@ class GeneralProduct { ei_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); ei_gemv_selector::HasUsableDirectAccess) - /*&& ei_is_same_type::ret*/>::run(*this, dst, alpha); + bool(ei_blas_traits::HasUsableDirectAccess)>::run(*this, dst, alpha); } }; @@ -357,7 +356,7 @@ template<> struct ei_gemv_selector ::run( actualLhs.rows(), actualLhs.cols(), &actualLhs.const_cast_derived().coeffRef(0,0), actualLhs.outerStride(), - actualRhs, actualRhs.innerStride(), + actualRhs.data(), actualRhs.innerStride(), actualDest, 1, actualAlpha); diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h index f9e24a193..c25317989 100644 --- a/Eigen/src/Core/SolveTriangular.h +++ b/Eigen/src/Core/SolveTriangular.h @@ -151,7 +151,7 @@ struct ei_triangular_solver_selector::run( r, actualPanelWidth, &(actualLhs.const_cast_derived().coeffRef(endBlock,startBlock)), actualLhs.outerStride(), - other.segment(startBlock, actualPanelWidth), other.innerStride(), + &other.coeff(startBlock), other.innerStride(), &(other.coeffRef(endBlock, 0)), other.innerStride(), Scalar(-1)); } } diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index ffb4cd386..9214582ed 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -134,12 +134,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st } // FIXME -// #ifdef EIGEN_HAS_FUSE_CJMADD +#ifndef EIGEN_HAS_FUSE_CJMADD +#define EIGEN_HAS_FUSE_CJMADD +#endif +#ifdef EIGEN_HAS_FUSE_CJMADD #define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C); -// #else - //#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T)); -// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); -// #endif +#else + #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,ResPacket(T)); +#endif // optimized GEneral packed Block * packed Panel product kernel template @@ -712,7 +714,9 @@ EIGEN_ASM_COMMENT("myend"); const RhsScalar* blB = unpackedB; for(Index k=0; k(blA), ei_pload(blB), C0, T0); blB += RhsPacketSize; blA += LhsPacketSize; diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index e0d71be7e..d772834a2 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -53,15 +53,13 @@ typedef typename ei_meta_if::ret LhsPacket; typedef typename ei_meta_if::ret RhsPacket; typedef typename ei_meta_if::ret ResPacket; -template EIGEN_DONT_INLINE static void run( Index rows, Index cols, const LhsScalar* lhs, Index lhsStride, - const RhsType&/*const RhsScalar**/ rhs, Index rhsIncr, + const RhsScalar* rhs, Index rhsIncr, ResScalar* res, Index resIncr, ResScalar alpha) { - EIGEN_UNUSED_VARIABLE(rhsIncr); ei_internal_assert(resIncr==1); #ifdef _EIGEN_ACCUMULATE_PACKETS #error _EIGEN_ACCUMULATE_PACKETS has already been defined @@ -147,8 +145,10 @@ EIGEN_DONT_INLINE static void run( Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; for (Index i=skipColumns; i(alpha*rhs[i]), ptmp1 = ei_pset1(alpha*rhs[i+offset1]), - ptmp2 = ei_pset1(alpha*rhs[i+2]), ptmp3 = ei_pset1(alpha*rhs[i+offset3]); + RhsPacket ptmp0 = ei_pset1(alpha*rhs[i*rhsIncr]), + ptmp1 = ei_pset1(alpha*rhs[(i+offset1)*rhsIncr]), + ptmp2 = ei_pset1(alpha*rhs[(i+2)*rhsIncr]), + ptmp3 = ei_pset1(alpha*rhs[(i+offset3)*rhsIncr]); // this helps a lot generating better binary code const LhsScalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride, @@ -239,7 +239,7 @@ EIGEN_DONT_INLINE static void run( { for (Index i=start; i(alpha*rhs[i]); + RhsPacket ptmp0 = ei_pset1(alpha*rhs[i*rhsIncr]); const LhsScalar* lhs0 = lhs + i*lhsStride; if (Vectorizable) diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 16b02a425..67c131ab2 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -79,7 +79,7 @@ struct ei_product_triangular_vector_selector::run( r, actualPanelWidth, &(lhs.const_cast_derived().coeffRef(s,pi)), lhs.outerStride(), - rhs.segment(pi, actualPanelWidth), rhs.innerStride(), + &rhs.coeff(pi), rhs.innerStride(), &res.coeffRef(s), res.innerStride(), alpha); } }