mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-29 23:34:12 +08:00
bug #958 - Allow testing specific blocking sizes
This is only a debugging/testing patch. It allows testing specific product blocking sizes, typically to study the impact on performance. Example usage: int testk, testm, testn; #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K testk #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M testm #define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N testn #include <Eigen/Core>
This commit is contained in:
parent
c7bb1e8ea8
commit
4a3e6c8be1
@ -84,6 +84,22 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff
|
|||||||
template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
|
template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
|
||||||
void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads)
|
void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n, int num_threads)
|
||||||
{
|
{
|
||||||
|
#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
|
||||||
|
EIGEN_UNUSED_VARIABLE(num_threads);
|
||||||
|
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||||
|
enum {
|
||||||
|
kr = 16,
|
||||||
|
mr = Traits::mr,
|
||||||
|
nr = Traits::nr
|
||||||
|
};
|
||||||
|
k = std::min<SizeType>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
|
||||||
|
if (k > kr) k -= k % kr;
|
||||||
|
m = std::min<SizeType>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
|
||||||
|
if (m > mr) m -= m % mr;
|
||||||
|
n = std::min<SizeType>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
|
||||||
|
if (n > nr) n -= n % nr;
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
// Explanations:
|
// Explanations:
|
||||||
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
|
// Let's recall the product algorithms form kc x nc horizontal panels B' on the rhs and
|
||||||
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
|
// mc x kc blocks A' on the lhs. A' has to fit into L2 cache. Moreover, B' is processed
|
||||||
|
Loading…
x
Reference in New Issue
Block a user