diff --git a/Eigen/Core b/Eigen/Core index cbca16640..26195cd35 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -89,6 +89,14 @@ #endif #endif +#ifdef _OPENMP + #define EIGEN_HAS_OPENMP +#endif + +#ifdef EIGEN_HAS_OPENMP +#include +#endif + #include #include #include @@ -209,6 +217,7 @@ struct Dense {}; #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/SolveTriangular.h" +#include "src/Core/products/Parallelizer.h" #include "src/Core/products/CoeffBasedProduct.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/GeneralMatrixVector.h" diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 7f449ac23..c13e09eac 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -128,33 +128,13 @@ struct ei_traits > : ei_traits, Lhs, Rhs> > {}; -template -void ei_multithreaded_product(const Functor& func, int size) +template +struct ei_gemm_functor { - if(!Prallelize) - return func(0,size); - #ifdef OMP - int threads = omp_get_num_procs(); - #else - int threads = 1; - #endif - int blockSize = size / threads; - #pragma omp parallel for schedule(static,1) - for(int i=0; i struct ei_gemm_callback -{ - ei_gemm_callback(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha) + ei_gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha) : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha) {} - + void operator() (int start, int size) const { Gemm::run(m_lhs.rows(), size, m_lhs.cols(), @@ -194,28 +174,18 @@ class GeneralProduct Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) * RhsBlasTraits::extractScalarFactor(m_rhs); - typedef ei_gemm_callback, - _ActualLhsType, _ActualRhsType, Dest> Functor; - - #ifdef OMP - ei_multithreaded_product(Functor(lhs, rhs, dst, actualAlpha), this->cols()); - #else + typedef ei_gemm_functor< + Scalar, ei_general_matrix_matrix_product< Scalar, (_ActualLhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), (_ActualRhsType::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), - (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor> - ::run( - this->rows(), this->cols(), lhs.cols(), - (const Scalar*)&(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), - (const Scalar*)&(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), - (Scalar*)&(dst.coeffRef(0,0)), dst.stride(), - actualAlpha); - #endif + (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, + _ActualLhsType, + _ActualRhsType, + Dest> Functor; + + ei_run_parallel_1d(Functor(lhs, rhs, dst, actualAlpha), this->cols()); } }; diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h new file mode 100644 index 000000000..d555508b2 --- /dev/null +++ b/Eigen/src/Core/products/Parallelizer.h @@ -0,0 +1,50 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_PARALLELIZER_H +#define EIGEN_PARALLELIZER_H + +template +void ei_run_parallel_1d(const Functor& func, int size) +{ +#ifndef EIGEN_HAS_OPENMP + func(0,size); +#else + if(!Parallelize) + return func(0,size); + + int threads = omp_get_num_procs(); + int blockSize = size / threads; + #pragma omp parallel for schedule(static,1) + for(int i=0; i