fix cache queries for non core2 CPU ;)

This commit is contained in:
Gael Guennebaud 2010-06-23 16:34:51 +02:00
parent 37dcdb1ed6
commit e1a6bad087

View File

@ -592,73 +592,87 @@ public:
# define EIGEN_CPUID(abcd,func) \ # define EIGEN_CPUID(abcd,func) \
__asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func)); __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func));
# else # else
# define EIGEN_CPUID(abcd,func) \ # define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func) ); __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
# endif # endif
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
# define EIGEN_CPUID(abcd,func) __cpuid((int*)abcd,func) # define EIGEN_CPUID(abcd,func) __cpuid((int*)abcd,func)
#endif #endif
/** \internal /** \internal
* \returns the size in Bytes of the L1 data cache */ * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
inline std::ptrdiff_t ei_queryL1CacheSize() inline void ei_queryCacheSizes(int& l1, int& l2, int& l3)
{ {
#ifdef EIGEN_CPUID #ifdef EIGEN_CPUID
int abcd[4]; int abcd[4];
// try the direct method using extended level const char GenuineIntel_char[] = "GenuntelineI";
EIGEN_CPUID(abcd,0x80000005); const int* GenuineIntel = (int*)GenuineIntel_char;
std::ptrdiff_t l1 = std::ptrdiff_t(abcd[2] >> 24) * 1024;
const char AuthenticAMD_char[] = "AuthcAMDenti";
if(l1>0) const int* AuthenticAMD = (int*)AuthenticAMD_char;
return l1*1024;
// Step 1: identify the CPU model
// it fails, try using the standard level EIGEN_CPUID(abcd,0x0,0);
EIGEN_CPUID(abcd,0x00000002); if(abcd[1]==GenuineIntel[0] && abcd[2]==GenuineIntel[1] && abcd[3]==GenuineIntel[2])
unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
for(int i=0; i<14; ++i)
{ {
switch(bytes[i]) // use Intel's cpuid API
{ l1 = l2 = l3 = 0;
case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines int cache_id = 0;
case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines int cache_type = 0;
case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines do {
case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) EIGEN_CPUID(abcd,0x4,cache_id);
case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) cache_type = (abcd[0] & 0x0F) >> 0;
case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines if(cache_type==1||cache_type==3) // data or unified cache
case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines {
// 56h L0 data TLB, 4M pages, 4 ways, 16 entries int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
// 57h L0 data TLB, 4K pages, 4 ways, 16 entries int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
// 59h L0 data TLB, 4K pages, fully, 16 entries int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored int sets = (abcd[2]); // C[31:0]
case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
// 77h code L1 cache, 16 KB, 4 ways, 64 byte lines, sectored (IA-64)
// 96h data L1 TLB, 4K...256M pages, fully, 32 entries (IA-64) switch(cache_level)
default: break; {
} case 1: l1 = cache_size; break;
case 2: l2 = cache_size; break;
case 3: l3 = cache_size; break;
default: break;
}
}
cache_id++;
} while(cache_type>0);
} }
else if(abcd[1]==AuthenticAMD[0] && abcd[2]==AuthenticAMD[1] && abcd[3]==AuthenticAMD[2])
return l1*1024; {
#else // use AMD's cpuid API
return -1; EIGEN_CPUID(abcd,0x80000005,0);
l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
EIGEN_CPUID(abcd,0x80000006,0);
l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
}
// TODO support other vendors
#endif #endif
} }
/** \internal /** \internal
* \returns the size in Bytes of the L2 or L3 cache if this later is present */ * \returns the size in Bytes of the L1 data cache */
inline std::ptrdiff_t ei_queryTopLevelCacheSize() inline int ei_queryL1CacheSize()
{ {
#ifdef EIGEN_CPUID int l1(-1), l2, l3;
int abcd[4]; ei_queryCacheSizes(l1,l2,l3);
EIGEN_CPUID(abcd,0x80000006); return l1;
std::ptrdiff_t l2 = std::ptrdiff_t(abcd[2] >> 16) * 1024; }
std::ptrdiff_t l3 = std::ptrdiff_t((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
/** \internal
* \returns the size in Bytes of the L2 or L3 cache if this later is present */
inline int ei_queryTopLevelCacheSize()
{
int l1, l2(-1), l3(-1);
ei_queryCacheSizes(l1,l2,l3);
return std::max(l2,l3); return std::max(l2,l3);
#else
return -1;
#endif
} }
#endif // EIGEN_MEMORY_H #endif // EIGEN_MEMORY_H