mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fix cache computation on old Intel CPUs which do not
support the cpuid function 0x4
This commit is contained in:
parent
5e7bd967cc
commit
f096452dfd
@ -607,7 +607,7 @@ inline bool ei_cpuid_is_vendor(int abcd[4], const char* vendor)
|
||||
return abcd[1]==((int*)(vendor))[0] && abcd[3]==((int*)(vendor))[1] && abcd[2]==((int*)(vendor))[2];
|
||||
}
|
||||
|
||||
inline void ei_queryCacheSizes_intel(int& l1, int& l2, int& l3)
|
||||
inline void ei_queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
|
||||
{
|
||||
int abcd[4];
|
||||
l1 = l2 = l3 = 0;
|
||||
@ -636,7 +636,95 @@ inline void ei_queryCacheSizes_intel(int& l1, int& l2, int& l3)
|
||||
}
|
||||
}
|
||||
cache_id++;
|
||||
} while(cache_type>0);
|
||||
} while(cache_type>0 && cache_id<16);
|
||||
}
|
||||
|
||||
inline void ei_queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
|
||||
{
|
||||
int abcd[4];
|
||||
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
||||
l1 = l2 = l3 = 0;
|
||||
EIGEN_CPUID(abcd,0x00000002,0);
|
||||
unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
|
||||
bool check_for_p2_core2 = false;
|
||||
for(int i=0; i<14; ++i)
|
||||
{
|
||||
switch(bytes[i])
|
||||
{
|
||||
case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
|
||||
case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
|
||||
case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
|
||||
case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
||||
case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
||||
case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
|
||||
case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
|
||||
case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
|
||||
case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
|
||||
case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
|
||||
case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
|
||||
case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
|
||||
case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
|
||||
case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
|
||||
case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
|
||||
case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
|
||||
case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
|
||||
case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
|
||||
case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
|
||||
case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
|
||||
case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
|
||||
case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
|
||||
case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
|
||||
case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
|
||||
case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
|
||||
case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
|
||||
case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
|
||||
case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
|
||||
case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
|
||||
case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
|
||||
case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
|
||||
case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
|
||||
case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
|
||||
case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
|
||||
case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
|
||||
case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
|
||||
case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
|
||||
case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
|
||||
case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
|
||||
case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
|
||||
case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
|
||||
case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
|
||||
case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
|
||||
case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
if(check_for_p2_core2 && l2 == l3)
|
||||
l3 = 0;
|
||||
l1 *= 1024;
|
||||
l2 *= 1024;
|
||||
l3 *= 1024;
|
||||
}
|
||||
|
||||
inline void ei_queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
|
||||
{
|
||||
if(max_std_funcs>=4)
|
||||
ei_queryCacheSizes_intel_direct(l1,l2,l3);
|
||||
else
|
||||
ei_queryCacheSizes_intel_codes(l1,l2,l3);
|
||||
}
|
||||
|
||||
inline void ei_queryCacheSizes_amd(int& l1, int& l2, int& l3)
|
||||
@ -661,14 +749,14 @@ inline void ei_queryCacheSizes(int& l1, int& l2, int& l3)
|
||||
|
||||
// identify the CPU vendor
|
||||
EIGEN_CPUID(abcd,0x0,0);
|
||||
//if(abcd[1]==GenuineIntel[0] && abcd[2]==GenuineIntel[1] && abcd[3]==GenuineIntel[2])
|
||||
int max_std_funcs = abcd[1];
|
||||
if(ei_cpuid_is_vendor(abcd,"GenuineIntel"))
|
||||
ei_queryCacheSizes_intel(l1,l2,l3);
|
||||
ei_queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
|
||||
else if(ei_cpuid_is_vendor(abcd,"AuthenticAMD") || ei_cpuid_is_vendor(abcd,"AMDisbetter!"))
|
||||
ei_queryCacheSizes_amd(l1,l2,l3);
|
||||
else
|
||||
// by default let's use Intel's API
|
||||
ei_queryCacheSizes_intel(l1,l2,l3);
|
||||
ei_queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
|
||||
|
||||
// here is the list of other vendors:
|
||||
// ||ei_cpuid_is_vendor(abcd,"VIA VIA VIA ")
|
||||
|
@ -25,11 +25,6 @@ int main()
|
||||
|
||||
#ifdef EIGEN_CPUID
|
||||
|
||||
ei_queryCacheSizes_intel(l1, l2, l3);
|
||||
cout << "Eigen's intel L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
|
||||
ei_queryCacheSizes_amd(l1, l2, l3);
|
||||
cout << "Eigen's amd L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
|
||||
|
||||
int abcd[4];
|
||||
int string[8];
|
||||
char* string_char = (char*)(string);
|
||||
@ -43,8 +38,21 @@ int main()
|
||||
cout << endl;
|
||||
cout << "vendor id = " << string_char << endl;
|
||||
cout << endl;
|
||||
int max_funcs = abcd[0];
|
||||
|
||||
ei_queryCacheSizes_intel_codes(l1, l2, l3);
|
||||
cout << "Eigen's intel codes L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
|
||||
if(max_funcs>=4)
|
||||
{
|
||||
ei_queryCacheSizes_intel_direct(l1, l2, l3);
|
||||
cout << "Eigen's intel direct L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
|
||||
}
|
||||
ei_queryCacheSizes_amd(l1, l2, l3);
|
||||
cout << "Eigen's amd L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl;
|
||||
cout << endl;
|
||||
|
||||
// dump Intel direct method
|
||||
if(max_funcs>=4)
|
||||
{
|
||||
l1 = l2 = l3 = 0;
|
||||
int cache_id = 0;
|
||||
@ -69,103 +77,14 @@ int main()
|
||||
cout << "cache[" << cache_id << "].size = " << cache_size << "\n";
|
||||
|
||||
cache_id++;
|
||||
} while(cache_type>0);
|
||||
}
|
||||
|
||||
// manual method for intel
|
||||
{
|
||||
l1 = l2 = l3 = 0;
|
||||
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
|
||||
EIGEN_CPUID(abcd,0x00000002,0);
|
||||
unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
|
||||
for(int i=0; i<14; ++i)
|
||||
{
|
||||
switch(bytes[i])
|
||||
{
|
||||
case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
|
||||
case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
|
||||
case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
|
||||
case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
||||
case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
|
||||
case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
|
||||
case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
|
||||
// 56h L0 data TLB, 4M pages, 4 ways, 16 entries
|
||||
// 57h L0 data TLB, 4K pages, 4 ways, 16 entries
|
||||
// 59h L0 data TLB, 4K pages, fully, 16 entries
|
||||
case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
|
||||
case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
|
||||
// 77h code L1 cache, 16 KB, 4 ways, 64 byte lines, sectored (IA-64)
|
||||
// 96h data L1 TLB, 4K...256M pages, fully, 32 entries (IA-64)
|
||||
|
||||
|
||||
case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
|
||||
case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
|
||||
case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
|
||||
case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
|
||||
case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
|
||||
case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
|
||||
case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
|
||||
case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
|
||||
case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
|
||||
case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
|
||||
case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
|
||||
case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
|
||||
case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
|
||||
case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
|
||||
case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
|
||||
case 0x49: l3 = 4096; break; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or
|
||||
case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
|
||||
case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
|
||||
case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
|
||||
case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
|
||||
case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
|
||||
case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
|
||||
case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
|
||||
case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
|
||||
case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
|
||||
case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
|
||||
case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
|
||||
case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
|
||||
case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
|
||||
case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
|
||||
case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
|
||||
case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
|
||||
case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
|
||||
case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
|
||||
case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
|
||||
case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
|
||||
case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
|
||||
case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
|
||||
case 0x9B: l2 = 1024; break; // data L2 TLB, 4K...256M pages, fully, 96 entries (IA-64)
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
cout << "tedious way l1 = " << l1 << endl;
|
||||
cout << "tedious way l2 = " << l2 << endl;
|
||||
cout << "tedious way l3 = " << l3 << endl;
|
||||
} while(cache_type>0 && cache_id<16);
|
||||
}
|
||||
|
||||
// dump everything
|
||||
std::cout << endl <<"Raw dump:" << endl;
|
||||
DUMP_CPUID(0x0);
|
||||
DUMP_CPUID(0x1);
|
||||
DUMP_CPUID(0x2);
|
||||
DUMP_CPUID(0x3);
|
||||
DUMP_CPUID(0x4);
|
||||
DUMP_CPUID(0x5);
|
||||
DUMP_CPUID(0x6);
|
||||
for(int i=0; i<max_funcs; ++i)
|
||||
DUMP_CPUID(i);
|
||||
|
||||
DUMP_CPUID(0x80000000);
|
||||
DUMP_CPUID(0x80000001);
|
||||
DUMP_CPUID(0x80000002);
|
||||
|
Loading…
x
Reference in New Issue
Block a user