mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-20 16:49:38 +08:00
Fix a regression when using OpenMP, and fix bug #714: the number of threads might be lower than the number of requested ones
This commit is contained in:
parent
548b781380
commit
c7bb1e8ea8
@ -217,8 +217,9 @@ struct gemm_functor
|
|||||||
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
|
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void initParallelSession() const
|
void initParallelSession(Index num_threads) const
|
||||||
{
|
{
|
||||||
|
m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
|
||||||
m_blocking.allocateA();
|
m_blocking.allocateA();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,7 +277,7 @@ class level3_blocking
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
|
template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
|
||||||
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true>
|
class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
|
||||||
: public level3_blocking<
|
: public level3_blocking<
|
||||||
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
|
typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
|
||||||
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
|
typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
|
||||||
@ -299,7 +300,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, int /*num_threads*/, bool /*full_rows = false*/)
|
gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
|
||||||
{
|
{
|
||||||
this->m_mc = ActualRows;
|
this->m_mc = ActualRows;
|
||||||
this->m_nc = ActualCols;
|
this->m_nc = ActualCols;
|
||||||
@ -307,6 +308,9 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
|||||||
this->m_blockA = m_staticA;
|
this->m_blockA = m_staticA;
|
||||||
this->m_blockB = m_staticB;
|
this->m_blockB = m_staticB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void initParallel(Index, Index, Index, Index)
|
||||||
|
{}
|
||||||
|
|
||||||
inline void allocateA() {}
|
inline void allocateA() {}
|
||||||
inline void allocateB() {}
|
inline void allocateB() {}
|
||||||
@ -331,7 +335,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
gemm_blocking_space(Index rows, Index cols, Index depth, int num_threads, bool l3_blocking)
|
gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
|
||||||
{
|
{
|
||||||
this->m_mc = Transpose ? cols : rows;
|
this->m_mc = Transpose ? cols : rows;
|
||||||
this->m_nc = Transpose ? rows : cols;
|
this->m_nc = Transpose ? rows : cols;
|
||||||
@ -351,6 +355,19 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
|||||||
m_sizeA = this->m_mc * this->m_kc;
|
m_sizeA = this->m_mc * this->m_kc;
|
||||||
m_sizeB = this->m_kc * this->m_nc;
|
m_sizeB = this->m_kc * this->m_nc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void initParallel(Index rows, Index cols, Index depth, Index num_threads)
|
||||||
|
{
|
||||||
|
this->m_mc = Transpose ? cols : rows;
|
||||||
|
this->m_nc = Transpose ? rows : cols;
|
||||||
|
this->m_kc = depth;
|
||||||
|
|
||||||
|
eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
|
||||||
|
Index m = this->m_mc;
|
||||||
|
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
|
||||||
|
m_sizeA = this->m_mc * this->m_kc;
|
||||||
|
m_sizeB = this->m_kc * this->m_nc;
|
||||||
|
}
|
||||||
|
|
||||||
void allocateA()
|
void allocateA()
|
||||||
{
|
{
|
||||||
|
@ -120,25 +120,28 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
|
|||||||
return func(0,rows, 0,cols);
|
return func(0,rows, 0,cols);
|
||||||
|
|
||||||
Eigen::initParallel();
|
Eigen::initParallel();
|
||||||
func.initParallelSession();
|
func.initParallelSession(threads);
|
||||||
|
|
||||||
if(transpose)
|
if(transpose)
|
||||||
std::swap(rows,cols);
|
std::swap(rows,cols);
|
||||||
|
|
||||||
Index blockCols = (cols / threads) & ~Index(0x3);
|
|
||||||
Index blockRows = (rows / threads);
|
|
||||||
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
|
||||||
|
|
||||||
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
|
ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
|
||||||
|
|
||||||
#pragma omp parallel num_threads(threads)
|
#pragma omp parallel num_threads(threads)
|
||||||
{
|
{
|
||||||
Index i = omp_get_thread_num();
|
Index i = omp_get_thread_num();
|
||||||
|
// Note that the actual number of threads might be lower than the number of request ones.
|
||||||
|
Index actual_threads = omp_get_num_threads();
|
||||||
|
|
||||||
|
Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
||||||
|
Index blockRows = (rows / actual_threads);
|
||||||
|
blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
|
||||||
|
|
||||||
Index r0 = i*blockRows;
|
Index r0 = i*blockRows;
|
||||||
Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
|
Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
|
||||||
|
|
||||||
Index c0 = i*blockCols;
|
Index c0 = i*blockCols;
|
||||||
Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
|
Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
|
||||||
|
|
||||||
info[i].lhs_start = r0;
|
info[i].lhs_start = r0;
|
||||||
info[i].lhs_length = actualBlockRows;
|
info[i].lhs_length = actualBlockRows;
|
||||||
|
@ -64,8 +64,7 @@ void test_product_large()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Regression test for bug 714:
|
// Regression test for bug 714:
|
||||||
#ifdef EIGEN_HAS_OPENMP
|
#if defined EIGEN_HAS_OPENMP
|
||||||
std::cout << "Testing omp_set_dynamic(1)\n";
|
|
||||||
omp_set_dynamic(1);
|
omp_set_dynamic(1);
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
CALL_SUBTEST_6( product(Matrix<float,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
CALL_SUBTEST_6( product(Matrix<float,Dynamic,Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) );
|
||||||
|
Loading…
x
Reference in New Issue
Block a user