actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment

This commit is contained in:
Benoit Jacob 2015-03-15 18:12:18 -04:00
parent 1dd3d89818
commit 3589a9c115

View File

@ -1017,11 +1017,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// However, if depth is too small, we can extend the number of rows of these horizontal panels. // However, if depth is too small, we can extend the number of rows of these horizontal panels.
// This actual number of rows is computed as follow: // This actual number of rows is computed as follow:
const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function. const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
// suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
// or because we are testing specific blocking sizes.
const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) )); const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
#else
const Index actual_panel_rows = (3*LhsProgress) * ( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) );
#endif
for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows) for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
{ {
const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3); const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);