From e1a6bad087f277ef11f6ec911e412591c1beacbd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 23 Jun 2010 16:34:51 +0200 Subject: [PATCH] fix cache queries for non core2 CPU ;) --- Eigen/src/Core/util/Memory.h | 114 ++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 5bad8e37e..93214d3a6 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -592,73 +592,87 @@ public: # define EIGEN_CPUID(abcd,func) \ __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func)); # else -# define EIGEN_CPUID(abcd,func) \ - __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func) ); +# define EIGEN_CPUID(abcd,func,id) \ + __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) ); # endif #elif defined(_MSC_VER) # define EIGEN_CPUID(abcd,func) __cpuid((int*)abcd,func) #endif /** \internal - * \returns the size in Bytes of the L1 data cache */ -inline std::ptrdiff_t ei_queryL1CacheSize() + * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */ +inline void ei_queryCacheSizes(int& l1, int& l2, int& l3) { #ifdef EIGEN_CPUID int abcd[4]; - - // try the direct method using extended level - EIGEN_CPUID(abcd,0x80000005); - std::ptrdiff_t l1 = std::ptrdiff_t(abcd[2] >> 24) * 1024; - - if(l1>0) - return l1*1024; - - // it fails, try using the standard level - EIGEN_CPUID(abcd,0x00000002); - unsigned char * bytes = reinterpret_cast(abcd)+2; - for(int i=0; i<14; ++i) + + const char GenuineIntel_char[] = "GenuntelineI"; + const int* GenuineIntel = (int*)GenuineIntel_char; + + const char AuthenticAMD_char[] = "AuthcAMDenti"; + const int* AuthenticAMD = (int*)AuthenticAMD_char; + + // Step 1: identify the CPU model + EIGEN_CPUID(abcd,0x0,0); + if(abcd[1]==GenuineIntel[0] && abcd[2]==GenuineIntel[1] && abcd[3]==GenuineIntel[2]) { - switch(bytes[i]) - { - case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines - case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines - case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines - case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) - case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) - case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines - case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines -// 56h L0 data TLB, 4M pages, 4 ways, 16 entries -// 57h L0 data TLB, 4K pages, 4 ways, 16 entries -// 59h L0 data TLB, 4K pages, fully, 16 entries - case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored - case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored - case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored - case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored -// 77h code L1 cache, 16 KB, 4 ways, 64 byte lines, sectored (IA-64) -// 96h data L1 TLB, 4K...256M pages, fully, 32 entries (IA-64) - default: break; - } + // use Intel's cpuid API + l1 = l2 = l3 = 0; + int cache_id = 0; + int cache_type = 0; + do { + EIGEN_CPUID(abcd,0x4,cache_id); + cache_type = (abcd[0] & 0x0F) >> 0; + if(cache_type==1||cache_type==3) // data or unified cache + { + int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5] + int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22] + int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12] + int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0] + int sets = (abcd[2]); // C[31:0] + + int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1); + + switch(cache_level) + { + case 1: l1 = cache_size; break; + case 2: l2 = cache_size; break; + case 3: l3 = cache_size; break; + default: break; + } + } + cache_id++; + } while(cache_type>0); } - - return l1*1024; - #else - return -1; + else if(abcd[1]==AuthenticAMD[0] && abcd[2]==AuthenticAMD[1] && abcd[3]==AuthenticAMD[2]) + { + // use AMD's cpuid API + EIGEN_CPUID(abcd,0x80000005,0); + l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB + EIGEN_CPUID(abcd,0x80000006,0); + l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB + l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB + } + // TODO support other vendors #endif } /** \internal - * \returns the size in Bytes of the L2 or L3 cache if this later is present */ -inline std::ptrdiff_t ei_queryTopLevelCacheSize() + * \returns the size in Bytes of the L1 data cache */ +inline int ei_queryL1CacheSize() { - #ifdef EIGEN_CPUID - int abcd[4]; - EIGEN_CPUID(abcd,0x80000006); - std::ptrdiff_t l2 = std::ptrdiff_t(abcd[2] >> 16) * 1024; - std::ptrdiff_t l3 = std::ptrdiff_t((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; + int l1(-1), l2, l3; + ei_queryCacheSizes(l1,l2,l3); + return l1; +} + +/** \internal + * \returns the size in Bytes of the L2 or L3 cache if this later is present */ +inline int ei_queryTopLevelCacheSize() +{ + int l1, l2(-1), l3(-1); + ei_queryCacheSizes(l1,l2,l3); return std::max(l2,l3); - #else - return -1; - #endif } #endif // EIGEN_MEMORY_H