1 /** 2 * Identify the characteristics of the host CPU, providing information 3 * about cache sizes and assembly optimisation hints. This module is 4 * provided primarily for assembly language programmers. 5 * 6 * References: 7 * Some of this information was extremely difficult to track down. Some of the 8 * documents below were found only in cached versions stored by search engines! 9 * This code relies on information found in: 10 * 11 * $(UL 12 * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual, 13 * Volume 2A: Instruction Set Reference, A-M" (2007). 14 * ) 15 * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008). 16 * ) 17 * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD 18 * Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005). 19 * ) 20 * $(LI "AMD Geode(TM) GX Processors Data Book", 21 * Advanced Micro Devices, Publication ID 31505E, (2005). 22 * ) 23 * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000). 24 * ) 25 * $(LI "Application note 106: Software Customization for the 6x86 Family", 26 * Cyrix Corporation, Rev 1.5 (1998) 27 * ) 28 * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf)) 29 * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution", 30 * National Semiconductor, (2002) 31 * ) 32 * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008). 33 * ) 34 * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm)) 35 * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf)) 36 * $(LI "What every programmer should know about memory", 37 * Ulrich Depper, Red Hat, Inc., (2007). 38 * ) 39 * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009). 40 * $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm) 41 * ) 42 * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application 43 * Note 485" (2009). 44 * ) 45 * ) 46 * 47 * Bugs: Currently only works on x86 and Itanium CPUs. 48 * Many processors have bugs in their microcode for the CPUID instruction, 49 * so sometimes the cache information may be incorrect. 50 * 51 * Copyright: Copyright Don Clugston 2007 - 2009. 52 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 53 * Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk> 54 * Source: $(DRUNTIMESRC core/_cpuid.d) 55 */ 56 57 module core.cpuid; 58 59 version (GNU) version = GNU_OR_LDC; 60 version (LDC) version = GNU_OR_LDC; 61 62 @trusted: 63 nothrow: 64 @nogc: 65 66 // If optimizing for a particular processor, it is generally better 67 // to identify based on features rather than model. NOTE: Normally 68 // it's only worthwhile to optimise for the latest Intel and AMD CPU, 69 // with a backup for other CPUs. 70 // Pentium -- preferPentium1() 71 // PMMX -- + mmx() 72 // PPro -- default 73 // PII -- + mmx() 74 // PIII -- + mmx() + sse() 75 // PentiumM -- + mmx() + sse() + sse2() 76 // Pentium4 -- preferPentium4() 77 // PentiumD -- + isX86_64() 78 // Core2 -- default + isX86_64() 79 // AMD K5 -- preferPentium1() 80 // AMD K6 -- + mmx() 81 // AMD K6-II -- + mmx() + 3dnow() 82 // AMD K7 -- preferAthlon() 83 // AMD K8 -- + sse2() 84 // AMD K10 -- + isX86_64() 85 // Cyrix 6x86 -- preferPentium1() 86 // 6x86MX -- + mmx() 87 88 // GDC support uses extended inline assembly: 89 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html (general information and hints) 90 // https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html (binding variables to registers) 91 // https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names) 92 93 public: 94 95 /// Cache size and behaviour 96 struct CacheInfo 97 { 98 /// Size of the cache, in kilobytes, per CPU. 99 /// For L1 unified (data + code) caches, this size is half the physical size. 100 /// (we don't halve it for larger sizes, since normally 101 /// data size is much greater than code size for critical loops). 102 size_t size; 103 /// Number of ways of associativity, eg: 104 /// $(UL 105 /// $(LI 1 = direct mapped) 106 /// $(LI 2 = 2-way set associative) 107 /// $(LI 3 = 3-way set associative) 108 /// $(LI ubyte.max = fully associative) 109 /// ) 110 ubyte associativity; 111 /// Number of bytes read into the cache when a cache miss occurs. 112 uint lineSize; 113 } 114 115 public: 116 /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.) 117 // Note: When we deprecate it, we simply make it private. 118 __gshared CacheInfo[5] datacache; 119 120 @property pure 121 { 122 /// The data caches. If there are fewer than 5 physical caches levels, 123 /// the remaining levels are set to size_t.max (== entire memory space) 124 const(CacheInfo)[5] dataCaches() { return _dataCaches; } 125 126 /// Returns vendor string, for display purposes only. 127 /// Do NOT use this to determine features! 128 /// Note that some CPUs have programmable vendorIDs. 129 string vendor() {return _vendor;} 130 /// Returns processor string, for display purposes only 131 string processor() {return _processor;} 132 133 /// Does it have an x87 FPU on-chip? 134 bool x87onChip() {return _x87onChip;} 135 /// Is MMX supported? 136 bool mmx() {return _mmx;} 137 /// Is SSE supported? 138 bool sse() {return _sse;} 139 /// Is SSE2 supported? 140 bool sse2() {return _sse2;} 141 /// Is SSE3 supported? 142 bool sse3() {return _sse3;} 143 /// Is SSSE3 supported? 144 bool ssse3() {return _ssse3;} 145 /// Is SSE4.1 supported? 146 bool sse41() {return _sse41;} 147 /// Is SSE4.2 supported? 148 bool sse42() {return _sse42;} 149 /// Is SSE4a supported? 150 bool sse4a() {return _sse4a;} 151 /// Is AES supported 152 bool aes() {return _aes;} 153 /// Is pclmulqdq supported 154 bool hasPclmulqdq() {return _hasPclmulqdq;} 155 /// Is rdrand supported 156 bool hasRdrand() {return _hasRdrand;} 157 /// Is AVX supported 158 bool avx() {return _avx;} 159 /// Is VEX-Encoded AES supported 160 bool vaes() {return _vaes;} 161 /// Is vpclmulqdq supported 162 bool hasVpclmulqdq(){return _hasVpclmulqdq; } 163 /// Is FMA supported 164 bool fma() {return _fma;} 165 /// Is FP16C supported 166 bool fp16c() {return _fp16c;} 167 /// Is AVX2 supported 168 bool avx2() {return _avx2;} 169 /// Is HLE (hardware lock elision) supported 170 bool hle() {return _hle;} 171 /// Is RTM (restricted transactional memory) supported 172 bool rtm() {return _rtm;} 173 /// Is AVX512F supported 174 bool avx512f() {return _avx512f;} 175 /// Is rdseed supported 176 bool hasRdseed() {return _hasRdseed;} 177 /// Is SHA supported 178 bool hasSha() {return _hasSha;} 179 /// Is AMD 3DNOW supported? 180 bool amd3dnow() {return _amd3dnow;} 181 /// Is AMD 3DNOW Ext supported? 182 bool amd3dnowExt() {return _amd3dnowExt;} 183 /// Are AMD extensions to MMX supported? 184 bool amdMmx() {return _amdMmx;} 185 /// Is fxsave/fxrstor supported? 186 bool hasFxsr() {return _hasFxsr;} 187 /// Is cmov supported? 188 bool hasCmov() {return _hasCmov;} 189 /// Is rdtsc supported? 190 bool hasRdtsc() {return _hasRdtsc;} 191 /// Is cmpxchg8b supported? 192 bool hasCmpxchg8b() {return _hasCmpxchg8b;} 193 /// Is cmpxchg8b supported? 194 bool hasCmpxchg16b() {return _hasCmpxchg16b;} 195 /// Is SYSENTER/SYSEXIT supported? 196 bool hasSysEnterSysExit() {return _hasSysEnterSysExit;} 197 /// Is 3DNow prefetch supported? 198 bool has3dnowPrefetch() {return _has3dnowPrefetch;} 199 /// Are LAHF and SAHF supported in 64-bit mode? 200 bool hasLahfSahf() {return _hasLahfSahf;} 201 /// Is POPCNT supported? 202 bool hasPopcnt() {return _hasPopcnt;} 203 /// Is LZCNT supported? 204 bool hasLzcnt() {return _hasLzcnt;} 205 /// Is this an Intel64 or AMD 64? 206 bool isX86_64() {return _isX86_64;} 207 208 /// Is this an IA64 (Itanium) processor? 209 bool isItanium() { return _isItanium; } 210 211 /// Is hyperthreading supported? 212 bool hyperThreading() { return _hyperThreading; } 213 /// Returns number of threads per CPU 214 uint threadsPerCPU() {return _threadsPerCPU;} 215 /// Returns number of cores in CPU 216 uint coresPerCPU() {return _coresPerCPU;} 217 218 /// Optimisation hints for assembly code. 219 /// 220 /// For forward compatibility, the CPU is compared against different 221 /// microarchitectures. For 32-bit x86, comparisons are made against 222 /// the Intel PPro/PII/PIII/PM family. 223 /// 224 /// The major 32-bit x86 microarchitecture 'dynasties' have been: 225 /// 226 /// $(UL 227 /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). ) 228 /// $(LI AMD Athlon (K7, K8, K10). ) 229 /// $(LI Intel NetBurst (Pentium 4, Pentium D). ) 230 /// $(LI In-order Pentium (Pentium1, PMMX, Atom) ) 231 /// ) 232 /// 233 /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta, 234 /// Cyrix, Rise) were mostly in-order. 235 /// 236 /// Some new processors do not fit into the existing categories: 237 /// 238 /// $(UL 239 /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. ) 240 /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. ) 241 /// ) 242 /// 243 /// Within each dynasty, the optimisation techniques are largely 244 /// identical (eg, use instruction pairing for group 4). Major 245 /// instruction set improvements occur within each dynasty. 246 247 /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code? 248 bool preferAthlon() { return _preferAthlon; } 249 /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code? 250 bool preferPentium4() { return _preferPentium4; } 251 /// Does this CPU perform better on Pentium I code than Pentium Pro code? 252 bool preferPentium1() { return _preferPentium1; } 253 } 254 255 private immutable 256 { 257 /* These exist as immutables so that the query property functions can 258 * be backwards compatible with code that called them with (). 259 * Also, immutables can only be set by the static this(). 260 */ 261 const(CacheInfo)[5] _dataCaches; 262 string _vendor = "Browser"; 263 string _processor = "wasm"; 264 bool _x87onChip; 265 bool _mmx; 266 bool _sse; 267 bool _sse2; 268 bool _sse3; 269 bool _ssse3; 270 bool _sse41; 271 bool _sse42; 272 bool _sse4a; 273 bool _aes; 274 bool _hasPclmulqdq; 275 bool _hasRdrand; 276 bool _avx; 277 bool _vaes; 278 bool _hasVpclmulqdq; 279 bool _fma; 280 bool _fp16c; 281 bool _avx2; 282 bool _hle; 283 bool _rtm; 284 bool _avx512f; 285 bool _hasRdseed; 286 bool _hasSha; 287 bool _amd3dnow; 288 bool _amd3dnowExt; 289 bool _amdMmx; 290 bool _hasFxsr; 291 bool _hasCmov; 292 bool _hasRdtsc; 293 bool _hasCmpxchg8b; 294 bool _hasCmpxchg16b; 295 bool _hasSysEnterSysExit; 296 bool _has3dnowPrefetch; 297 bool _hasLahfSahf; 298 bool _hasPopcnt; 299 bool _hasLzcnt; 300 bool _isX86_64; 301 bool _isItanium; 302 bool _hyperThreading; 303 uint _threadsPerCPU; 304 uint _coresPerCPU; 305 bool _preferAthlon; 306 bool _preferPentium4; 307 bool _preferPentium1; 308 } 309 310 __gshared: 311 // All these values are set only once, and never subsequently modified. 312 public: 313 /// $(RED Warning: This field will be turned into a property in a future release.) 314 /// 315 /// Processor type (vendor-dependent). 316 /// This should be visible ONLY for display purposes. 317 uint stepping, model, family; 318 /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.) 319 uint numCacheLevels = 1; 320 /// The number of cache levels in the CPU. 321 @property uint cacheLevels() { return numCacheLevels; } 322 private: 323 324 struct CpuFeatures 325 { 326 bool probablyIntel; // true = _probably_ an Intel processor, might be faking 327 bool probablyAMD; // true = _probably_ an AMD or Hygon processor 328 string processorName; 329 char [12] vendorID = 0; 330 char [48] processorNameBuffer = 0; 331 uint features = 0; // mmx, sse, sse2, hyperthreading, etc 332 uint miscfeatures = 0; // sse3, etc. 333 uint extfeatures = 0; // HLE, AVX2, RTM, etc. 334 uint amdfeatures = 0; // 3DNow!, mmxext, etc 335 uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc 336 ulong xfeatures = 0; // XFEATURES_ENABLED_MASK 337 uint maxCores = 1; 338 uint maxThreads = 1; 339 } 340 341 CpuFeatures cpuFeatures; 342 343 /* Hide from the optimizer where cf (a register) is coming from, so that 344 * cf doesn't get "optimized away". The idea is to reference 345 * the global data through cf so not so many fixups are inserted 346 * into the executable image. 347 */ 348 CpuFeatures* getCpuFeatures() @nogc nothrow 349 { 350 pragma(inline, false); 351 return &cpuFeatures; 352 } 353 354 // Note that this may indicate multi-core rather than hyperthreading. 355 @property bool hyperThreadingBit() { return (cpuFeatures.features&HTT_BIT)!=0;} 356 357 // feature flags CPUID1_EDX 358 enum : uint 359 { 360 FPU_BIT = 1, 361 TIMESTAMP_BIT = 1<<4, // rdtsc 362 MDSR_BIT = 1<<5, // RDMSR/WRMSR 363 CMPXCHG8B_BIT = 1<<8, 364 SYSENTERSYSEXIT_BIT = 1<<11, 365 CMOV_BIT = 1<<15, 366 MMX_BIT = 1<<23, 367 FXSR_BIT = 1<<24, 368 SSE_BIT = 1<<25, 369 SSE2_BIT = 1<<26, 370 HTT_BIT = 1<<28, 371 IA64_BIT = 1<<30 372 } 373 // feature flags misc CPUID1_ECX 374 enum : uint 375 { 376 SSE3_BIT = 1, 377 PCLMULQDQ_BIT = 1<<1, // from AVX 378 MWAIT_BIT = 1<<3, 379 SSSE3_BIT = 1<<9, 380 FMA_BIT = 1<<12, // from AVX 381 CMPXCHG16B_BIT = 1<<13, 382 SSE41_BIT = 1<<19, 383 SSE42_BIT = 1<<20, 384 POPCNT_BIT = 1<<23, 385 AES_BIT = 1<<25, // AES instructions from AVX 386 OSXSAVE_BIT = 1<<27, // Used for AVX 387 AVX_BIT = 1<<28, 388 FP16C_BIT = 1<<29, 389 RDRAND_BIT = 1<<30, 390 } 391 // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX. 392 enum : uint 393 { 394 FSGSBASE_BIT = 1 << 0, 395 SGX_BIT = 1 << 2, 396 BMI1_BIT = 1 << 3, 397 HLE_BIT = 1 << 4, 398 AVX2_BIT = 1 << 5, 399 SMEP_BIT = 1 << 7, 400 BMI2_BIT = 1 << 8, 401 ERMS_BIT = 1 << 9, 402 INVPCID_BIT = 1 << 10, 403 RTM_BIT = 1 << 11, 404 AVX512F_BIT = 1 << 16, 405 AVX512DQ_BIT = 1 << 17, 406 RDSEED_BIT = 1 << 18, 407 ADX_BIT = 1 << 19, 408 AVX512IFMA_BIT = 1 << 21, 409 CLFLUSHOPT_BIT = 1 << 23, 410 CLWB_BIT = 1 << 24, 411 AVX512PF_BIT = 1 << 26, 412 AVX512ER_BIT = 1 << 27, 413 AVX512CD_BIT = 1 << 28, 414 SHA_BIT = 1 << 29, 415 AVX512BW_BIT = 1 << 30, 416 AVX512VL_BIT = 1 << 31, 417 } 418 // feature flags XFEATURES_ENABLED_MASK 419 enum : ulong 420 { 421 XF_FP_BIT = 0x1, 422 XF_SSE_BIT = 0x2, 423 XF_YMM_BIT = 0x4, 424 } 425 // AMD feature flags CPUID80000001_EDX 426 enum : uint 427 { 428 AMD_MMX_BIT = 1<<22, 429 // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions. 430 FFXSR_BIT = 1<<25, 431 PAGE1GB_BIT = 1<<26, // support for 1GB pages 432 RDTSCP_BIT = 1<<27, 433 AMD64_BIT = 1<<29, 434 AMD_3DNOW_EXT_BIT = 1<<30, 435 AMD_3DNOW_BIT = 1<<31 436 } 437 // AMD misc feature flags CPUID80000001_ECX 438 enum : uint 439 { 440 LAHFSAHF_BIT = 1, 441 LZCNT_BIT = 1<<5, 442 SSE4A_BIT = 1<<6, 443 AMD_3DNOW_PREFETCH_BIT = 1<<8, 444 } 445 446 447 version (GNU_OR_LDC) { 448 version (X86) 449 enum supportedX86 = true; 450 else version (X86_64) 451 enum supportedX86 = true; 452 else 453 enum supportedX86 = false; 454 } else version (D_InlineAsm_X86) { 455 enum supportedX86 = true; 456 } else version (D_InlineAsm_X86_64) { 457 enum supportedX86 = true; 458 } else { 459 enum supportedX86 = false; 460 } 461 462 version (WASI) {} else: // WASI/WASM doesn't support cpuid 463 464 static if (supportedX86) { 465 // Note that this code will also work for Itanium in x86 mode. 466 467 __gshared uint max_cpuid, max_extended_cpuid; 468 469 // CPUID2: "cache and tlb information" 470 void getcacheinfoCPUID2() 471 { 472 // We are only interested in the data caches 473 void decipherCpuid2(ubyte x) @nogc nothrow { 474 if (x==0) return; 475 // Values from http://www.sandpile.org/ia32/cpuid.htm. 476 // Includes Itanium and non-Intel CPUs. 477 // 478 static immutable ubyte [63] ids = [ 479 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68, 480 // level 2 cache 481 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F, 482 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E, 483 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81, 484 // level 3 cache 485 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, 486 487 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE, 488 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC 489 ]; 490 static immutable uint [63] sizes = [ 491 8, 16, 16, 64, 16, 24, 8, 16, 32, 492 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512, 493 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024, 494 128, 192, 128, 256, 384, 512, 3072, 512, 128, 495 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024, 496 497 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024, 498 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024 499 ]; 500 // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative 501 static immutable ubyte [63] ways = [ 502 2, 4, 4, 8, 8, 6, 4, 4, 4, 503 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2, 504 8, 8, 8, 8, 4, 8, 16, 24, 505 4, 6, 2, 4, 6, 4, 12, 8, 8, 506 4, 8, 8, 8, 4, 8, 12, 16, 12, 16, 507 4, 4, 4, 8, 8, 8, 12, 12, 12, 508 16, 16, 16, 24, 24, 24 509 ]; 510 enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 } 511 for (size_t i=0; i< ids.length; ++i) { 512 if (x==ids[i]) { 513 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2; 514 if (x==0x49 && family==0xF && model==0x6) level=2; 515 datacache[level].size=sizes[i]; 516 datacache[level].associativity=ways[i]; 517 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80) 518 || x==0x86 || x==0x87 519 || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){ 520 datacache[level].lineSize = 64; 521 } else datacache[level].lineSize = 32; 522 } 523 } 524 } 525 526 uint[4] a; 527 bool firstTime = true; 528 // On a multi-core system, this could theoretically fail, but it's only used 529 // for old single-core CPUs. 530 uint numinfos = 1; 531 do { 532 version (GNU_OR_LDC) asm pure nothrow @nogc { 533 "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2); 534 } else asm pure nothrow @nogc { 535 mov EAX, 2; 536 cpuid; 537 mov a+0, EAX; 538 mov a+4, EBX; 539 mov a+8, ECX; 540 mov a+12, EDX; 541 } 542 if (firstTime) { 543 if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) { 544 // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080. 545 // These are NOT standard Intel values 546 // (TLB = 32 entry, 4 way associative, 4K pages) 547 // (L1 cache = 16K, 4way, linesize16) 548 datacache[0].size=8; 549 datacache[0].associativity=4; 550 datacache[0].lineSize=16; 551 return; 552 } 553 // lsb of a is how many times to loop. 554 numinfos = a[0] & 0xFF; 555 // and otherwise it should be ignored 556 a[0] &= 0xFFFF_FF00; 557 firstTime = false; 558 } 559 for (int c=0; c<4;++c) { 560 // high bit set == no info. 561 if (a[c] & 0x8000_0000) continue; 562 decipherCpuid2(cast(ubyte)(a[c] & 0xFF)); 563 decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF)); 564 decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF)); 565 decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF)); 566 } 567 } while (--numinfos); 568 } 569 570 // CPUID4: "Deterministic cache parameters" leaf 571 void getcacheinfoCPUID4() 572 { 573 int cachenum = 0; 574 for (;;) { 575 uint a, b, number_of_sets; 576 version (GNU_OR_LDC) asm pure nothrow @nogc { 577 "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx"; 578 } else asm pure nothrow @nogc { 579 mov EAX, 4; 580 mov ECX, cachenum; 581 cpuid; 582 mov a, EAX; 583 mov b, EBX; 584 mov number_of_sets, ECX; 585 } 586 ++cachenum; 587 if ((a&0x1F)==0) break; // no more caches 588 immutable uint numthreads = ((a>>14) & 0xFFF) + 1; 589 immutable uint numcores = ((a>>26) & 0x3F) + 1; 590 if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; 591 if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches 592 593 ++number_of_sets; 594 immutable ubyte level = cast(ubyte)(((a>>5)&7)-1); 595 if (level > datacache.length) continue; // ignore deep caches 596 datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1); 597 datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size 598 immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1; 599 // Size = number of sets * associativity * cachelinesize * linepartitions 600 // and must convert to Kb, also dividing by the number of hyperthreads using this cache. 601 immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets * 602 datacache[level].associativity : number_of_sets; 603 datacache[level].size = cast(size_t)( 604 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024)); 605 if (level == 0 && (a&0xF)==3) { 606 // Halve the size for unified L1 caches 607 datacache[level].size/=2; 608 } 609 } 610 } 611 612 // CPUID8000_0005 & 6 613 void getAMDcacheinfo() 614 { 615 uint dummy, c5, c6, d6; 616 version (GNU_OR_LDC) asm pure nothrow @nogc { 617 "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx"; 618 } else asm pure nothrow @nogc { 619 mov EAX, 0x8000_0005; // L1 cache 620 cpuid; 621 // EAX has L1_TLB_4M. 622 // EBX has L1_TLB_4K 623 // EDX has L1 instruction cache 624 mov c5, ECX; 625 } 626 627 datacache[0].size = ( (c5>>24) & 0xFF); 628 datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF); 629 datacache[0].lineSize = c5 & 0xFF; 630 631 if (max_extended_cpuid >= 0x8000_0006) { 632 // AMD K6-III or K6-2+ or later. 633 uint numcores = 1; 634 if (max_extended_cpuid >= 0x8000_0008) { 635 // read the number of physical cores (minus 1) from the 8 lowest ECX bits 636 version (GNU_OR_LDC) asm pure nothrow @nogc { 637 "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx"; 638 } else asm pure nothrow @nogc { 639 mov EAX, 0x8000_0008; 640 cpuid; 641 mov numcores, ECX; 642 } 643 numcores = (numcores & 0xFF) + 1; 644 if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; 645 } 646 647 version (GNU_OR_LDC) asm pure nothrow @nogc { 648 "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx"; 649 } else asm pure nothrow @nogc { 650 mov EAX, 0x8000_0006; // L2/L3 cache 651 cpuid; 652 mov c6, ECX; // L2 cache info 653 mov d6, EDX; // L3 cache info 654 } 655 656 static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ]; 657 datacache[1].size = (c6>>16) & 0xFFFF; 658 datacache[1].associativity = assocmap[(c6>>12)&0xF]; 659 datacache[1].lineSize = c6 & 0xFF; 660 661 // The L3 cache value is TOTAL, not per core. 662 datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1. 663 datacache[2].associativity = assocmap[(d6>>12)&0xF]; 664 datacache[2].lineSize = d6 & 0xFF; 665 } 666 } 667 668 // For Intel CoreI7 and later, use function 0x0B 669 // to determine number of processors. 670 void getCpuInfo0B() 671 { 672 int threadsPerCore; 673 uint a, b, c, d; 674 // I'm not sure about this. The docs state that there 675 // are 2 hyperthreads per core if HT is factory enabled. 676 for (int level = 0; level < 2; level++) 677 { 678 version (GNU_OR_LDC) asm pure nothrow @nogc { 679 "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level); 680 } else asm pure nothrow @nogc { 681 mov EAX, 0x0B; 682 mov ECX, level; 683 cpuid; 684 mov a, EAX; 685 mov b, EBX; 686 mov c, ECX; 687 mov d, EDX; 688 } 689 if (b != 0) 690 { 691 if (level == 0) 692 threadsPerCore = b & 0xFFFF; 693 else if (level == 1) 694 { 695 cpuFeatures.maxThreads = b & 0xFFFF; 696 cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore; 697 } 698 } 699 // Got "invalid domain" returned from cpuid 700 if (a == 0 && b == 0) 701 break; 702 } 703 } 704 705 void cpuidX86() 706 { 707 auto cf = getCpuFeatures(); 708 709 uint a, b, c, d; 710 uint* venptr = cast(uint*)cf.vendorID.ptr; 711 version (GNU_OR_LDC) 712 { 713 asm pure nothrow @nogc { 714 "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0); 715 "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx"; 716 } 717 } 718 else 719 { 720 uint a2; 721 version (D_InlineAsm_X86) 722 { 723 asm pure nothrow @nogc { 724 mov EAX, 0; 725 cpuid; 726 mov a, EAX; 727 mov EAX, venptr; 728 mov [EAX], EBX; 729 mov [EAX + 4], EDX; 730 mov [EAX + 8], ECX; 731 } 732 } 733 else version (D_InlineAsm_X86_64) 734 { 735 asm pure nothrow @nogc { 736 mov EAX, 0; 737 cpuid; 738 mov a, EAX; 739 mov RAX, venptr; 740 mov [RAX], EBX; 741 mov [RAX + 4], EDX; 742 mov [RAX + 8], ECX; 743 } 744 } 745 asm pure nothrow @nogc { 746 mov EAX, 0x8000_0000; 747 cpuid; 748 mov a2, EAX; 749 } 750 max_cpuid = a; 751 max_extended_cpuid = a2; 752 } 753 754 755 cf.probablyIntel = cf.vendorID == "GenuineIntel"; 756 cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine"); 757 uint apic = 0; // brand index, apic id 758 version (GNU_OR_LDC) asm pure nothrow @nogc { 759 "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1); 760 } else { 761 asm pure nothrow @nogc { 762 mov EAX, 1; // model, stepping 763 cpuid; 764 mov a, EAX; 765 mov apic, EBX; 766 mov c, ECX; 767 mov d, EDX; 768 } 769 cf.features = d; 770 cf.miscfeatures = c; 771 } 772 stepping = a & 0xF; 773 immutable uint fbase = (a >> 8) & 0xF; 774 immutable uint mbase = (a >> 4) & 0xF; 775 family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase; 776 model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ? 777 mbase + ((a >> 12) & 0xF0) : mbase; 778 779 if (max_cpuid >= 7) 780 { 781 version (GNU_OR_LDC) asm pure nothrow @nogc { 782 "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx"; 783 } else { 784 uint ext; 785 asm pure nothrow @nogc { 786 mov EAX, 7; // Structured extended feature leaf. 787 mov ECX, 0; // Main leaf. 788 cpuid; 789 mov ext, EBX; // HLE, AVX2, RTM, etc. 790 } 791 cf.extfeatures = ext; 792 } 793 } 794 795 if (cf.miscfeatures & OSXSAVE_BIT) 796 { 797 version (GNU_OR_LDC) asm pure nothrow @nogc { 798 /* Old assemblers do not recognize xgetbv, and there is no easy way 799 * to conditionally compile based on the assembler used, so use the 800 * raw .byte sequence instead. */ 801 ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0); 802 } else asm pure nothrow @nogc { 803 mov ECX, 0; 804 xgetbv; 805 mov d, EDX; 806 mov a, EAX; 807 } 808 cf.xfeatures = cast(ulong)d << 32 | a; 809 } 810 811 cf.amdfeatures = 0; 812 cf.amdmiscfeatures = 0; 813 if (max_extended_cpuid >= 0x8000_0001) { 814 version (GNU_OR_LDC) asm pure nothrow @nogc { 815 "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx"; 816 } else { 817 asm pure nothrow @nogc { 818 mov EAX, 0x8000_0001; 819 cpuid; 820 mov c, ECX; 821 mov d, EDX; 822 } 823 cf.amdmiscfeatures = c; 824 cf.amdfeatures = d; 825 } 826 } 827 // Try to detect fraudulent vendorIDs 828 if (amd3dnow) cf.probablyIntel = false; 829 830 if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) { 831 //http://support.amd.com/TechDocs/25481.pdf pg.36 832 cf.maxCores = 1; 833 if (hyperThreadingBit) { 834 // determine max number of cores for AMD 835 version (GNU_OR_LDC) asm pure nothrow @nogc { 836 "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx"; 837 } else asm pure nothrow @nogc { 838 mov EAX, 0x8000_0008; 839 cpuid; 840 mov c, ECX; 841 } 842 cf.maxCores += c & 0xFF; 843 } 844 } 845 846 if (max_extended_cpuid >= 0x8000_0004) { 847 uint* pnb = cast(uint*)cf.processorNameBuffer.ptr; 848 version (GNU_OR_LDC) 849 { 850 asm pure nothrow @nogc { 851 "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002); 852 "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003); 853 "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004); 854 } 855 } 856 else version (D_InlineAsm_X86) 857 { 858 asm pure nothrow @nogc { 859 push ESI; 860 mov ESI, pnb; 861 mov EAX, 0x8000_0002; 862 cpuid; 863 mov [ESI], EAX; 864 mov [ESI+4], EBX; 865 mov [ESI+8], ECX; 866 mov [ESI+12], EDX; 867 mov EAX, 0x8000_0003; 868 cpuid; 869 mov [ESI+16], EAX; 870 mov [ESI+20], EBX; 871 mov [ESI+24], ECX; 872 mov [ESI+28], EDX; 873 mov EAX, 0x8000_0004; 874 cpuid; 875 mov [ESI+32], EAX; 876 mov [ESI+36], EBX; 877 mov [ESI+40], ECX; 878 mov [ESI+44], EDX; 879 pop ESI; 880 } 881 } 882 else version (D_InlineAsm_X86_64) 883 { 884 asm pure nothrow @nogc { 885 push RSI; 886 mov RSI, pnb; 887 mov EAX, 0x8000_0002; 888 cpuid; 889 mov [RSI], EAX; 890 mov [RSI+4], EBX; 891 mov [RSI+8], ECX; 892 mov [RSI+12], EDX; 893 mov EAX, 0x8000_0003; 894 cpuid; 895 mov [RSI+16], EAX; 896 mov [RSI+20], EBX; 897 mov [RSI+24], ECX; 898 mov [RSI+28], EDX; 899 mov EAX, 0x8000_0004; 900 cpuid; 901 mov [RSI+32], EAX; 902 mov [RSI+36], EBX; 903 mov [RSI+40], ECX; 904 mov [RSI+44], EDX; 905 pop RSI; 906 } 907 } 908 // Intel P4 and PM pad at front with spaces. 909 // Other CPUs pad at end with nulls. 910 int start = 0, end = 0; 911 while (cf.processorNameBuffer[start] == ' ') { ++start; } 912 while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; } 913 cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]); 914 } else { 915 cf.processorName = "Unknown CPU"; 916 } 917 // Determine cache sizes 918 919 // Intel docs specify that they return 0 for 0x8000_0005. 920 // AMD docs do not specify the behaviour for 0004 and 0002. 921 // Centaur/VIA and most other manufacturers use the AMD method, 922 // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2! 923 // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour 924 // for CPUID80000005. But Geode GX uses the AMD method 925 926 // Deal with Geode GX1 - make it same as MediaGX MMX. 927 if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) { 928 max_extended_cpuid = 0x8000_0004; 929 } 930 // Therefore, we try the AMD method unless it's an Intel chip. 931 // If we still have no info, try the Intel methods. 932 datacache[0].size = 0; 933 if (max_cpuid<2 || !cf.probablyIntel) { 934 if (max_extended_cpuid >= 0x8000_0005) { 935 getAMDcacheinfo(); 936 } else if (cf.probablyAMD) { 937 // According to AMDProcRecognitionAppNote, this means CPU 938 // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4) 939 // Am5x86 has 16Kb 4-way unified data & code cache. 940 datacache[0].size = 8; 941 datacache[0].associativity = 4; 942 datacache[0].lineSize = 32; 943 } else { 944 // Some obscure CPU. 945 // Values for Cyrix 6x86MX (family 6, model 0) 946 datacache[0].size = 64; 947 datacache[0].associativity = 4; 948 datacache[0].lineSize = 32; 949 } 950 } 951 if ((datacache[0].size == 0) && max_cpuid>=4) { 952 getcacheinfoCPUID4(); 953 } 954 if ((datacache[0].size == 0) && max_cpuid>=2) { 955 getcacheinfoCPUID2(); 956 } 957 if (datacache[0].size == 0) { 958 // Pentium, PMMX, late model 486, or an obscure CPU 959 if (mmx) { // Pentium MMX. Also has 8kB code cache. 960 datacache[0].size = 16; 961 datacache[0].associativity = 4; 962 datacache[0].lineSize = 32; 963 } else { // Pentium 1 (which also has 8kB code cache) 964 // or 486. 965 // Cyrix 6x86: 16, 4way, 32 linesize 966 datacache[0].size = 8; 967 datacache[0].associativity = 2; 968 datacache[0].lineSize = 32; 969 } 970 } 971 if (cf.probablyIntel && max_cpuid >= 0x0B) { 972 // For Intel i7 and later, use function 0x0B to determine 973 // cores and hyperthreads. 974 getCpuInfo0B(); 975 } else { 976 if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF; 977 else cf.maxThreads = cf.maxCores; 978 979 if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) { 980 version (GNU_OR_LDC) asm pure nothrow @nogc { 981 "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx"; 982 } else { 983 asm pure nothrow @nogc { 984 mov EAX, 0x8000_001e; 985 cpuid; 986 mov b, EBX; 987 } 988 } 989 ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1; 990 cf.maxCores = cf.maxThreads / coresPerComputeUnit; 991 } 992 } 993 } 994 995 // Return true if the cpuid instruction is supported. 996 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines. 997 bool hasCPUID() 998 { 999 version (X86_64) 1000 return true; 1001 else 1002 { 1003 uint flags; 1004 version (GNU_OR_LDC) 1005 { 1006 // http://wiki.osdev.org/CPUID#Checking_CPUID_availability 1007 asm nothrow @nogc { " 1008 pushfl # Save EFLAGS 1009 pushfl # Store EFLAGS 1010 xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS 1011 popfl # Load stored EFLAGS (with ID bit inverted) 1012 pushfl # Store EFLAGS again (ID bit may or may not be inverted) 1013 popl %%eax # eax = modified EFLAGS (ID bit may or may not be inverted) 1014 xorl (%%esp), %%eax # eax = whichever bits were changed 1015 popfl # Restore original EFLAGS 1016 " : "=a" (flags); 1017 } 1018 } 1019 else version (D_InlineAsm_X86) 1020 { 1021 asm nothrow @nogc { 1022 pushfd; 1023 pop EAX; 1024 mov flags, EAX; 1025 xor EAX, 0x0020_0000; 1026 push EAX; 1027 popfd; 1028 pushfd; 1029 pop EAX; 1030 xor flags, EAX; 1031 } 1032 } 1033 return (flags & 0x0020_0000) != 0; 1034 } 1035 } 1036 1037 } else { // supported X86 1038 1039 bool hasCPUID() { return false; } 1040 1041 void cpuidX86() 1042 { 1043 datacache[0].size = 8; 1044 datacache[0].associativity = 2; 1045 datacache[0].lineSize = 32; 1046 } 1047 } 1048 1049 /* 1050 // TODO: Implement this function with OS support 1051 void cpuidPPC() 1052 { 1053 enum :int { PPC601, PPC603, PPC603E, PPC604, 1054 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 } 1055 1056 // TODO: 1057 // asm { mfpvr; } returns the CPU version but unfortunately it can 1058 // only be used in kernel mode. So OS support is required. 1059 int cputype = PPC603; 1060 1061 // 601 has a 8KB combined data & code L1 cache. 1062 uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64]; 1063 ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8]; 1064 uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512]; 1065 uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0]; 1066 1067 datacache[0].size = sizes[cputype]; 1068 datacache[0].associativity = ways[cputype]; 1069 datacache[0].lineSize = (cputype==PPCG5)? 128 : 1070 (cputype == PPC620 || cputype == PPCG3)? 64 : 32; 1071 datacache[1].size = L2size[cputype]; 1072 datacache[2].size = L3size[cputype]; 1073 datacache[1].lineSize = datacache[0].lineSize; 1074 datacache[2].lineSize = datacache[0].lineSize; 1075 } 1076 1077 // TODO: Implement this function with OS support 1078 void cpuidSparc() 1079 { 1080 // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way. 1081 // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192. 1082 // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way 1083 // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024. 1084 // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024. 1085 // Sparc64V : L1 = 128, 2way. L2 = 4096 4way. 1086 } 1087 */ 1088 1089 pragma(crt_constructor) void cpuid_initialization() 1090 { 1091 auto cf = getCpuFeatures(); 1092 1093 if (hasCPUID()) { 1094 cpuidX86(); 1095 } else { 1096 // it's a 386 or 486, or a Cyrix 6x86. 1097 //Probably still has an external cache. 1098 } 1099 if (datacache[0].size==0) { 1100 // Guess same as Pentium 1. 1101 datacache[0].size = 8; 1102 datacache[0].associativity = 2; 1103 datacache[0].lineSize = 32; 1104 } 1105 numCacheLevels = 1; 1106 // And now fill up all the unused levels with full memory space. 1107 for (size_t i=1; i< datacache.length; ++i) { 1108 if (datacache[i].size==0) { 1109 // Set all remaining levels of cache equal to full address space. 1110 datacache[i].size = size_t.max/1024; 1111 datacache[i].associativity = 1; 1112 datacache[i].lineSize = datacache[i-1].lineSize; 1113 } 1114 else 1115 ++numCacheLevels; 1116 } 1117 1118 // Set the immortals 1119 1120 _dataCaches = datacache; 1121 _vendor = cast(string)cf.vendorID; 1122 _processor = cf.processorName; 1123 _x87onChip = (cf.features&FPU_BIT)!=0; 1124 _mmx = (cf.features&MMX_BIT)!=0; 1125 _sse = (cf.features&SSE_BIT)!=0; 1126 _sse2 = (cf.features&SSE2_BIT)!=0; 1127 _sse3 = (cf.miscfeatures&SSE3_BIT)!=0; 1128 _ssse3 = (cf.miscfeatures&SSSE3_BIT)!=0; 1129 _sse41 = (cf.miscfeatures&SSE41_BIT)!=0; 1130 _sse42 = (cf.miscfeatures&SSE42_BIT)!=0; 1131 _sse4a = (cf.amdmiscfeatures&SSE4A_BIT)!=0; 1132 _aes = (cf.miscfeatures&AES_BIT)!=0; 1133 _hasPclmulqdq = (cf.miscfeatures&PCLMULQDQ_BIT)!=0; 1134 _hasRdrand = (cf.miscfeatures&RDRAND_BIT)!=0; 1135 1136 enum avx_mask = XF_SSE_BIT|XF_YMM_BIT; 1137 _avx = (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0; 1138 1139 _vaes = avx && aes; 1140 _hasVpclmulqdq = avx && hasPclmulqdq; 1141 _fma = avx && (cf.miscfeatures&FMA_BIT)!=0; 1142 _fp16c = avx && (cf.miscfeatures&FP16C_BIT)!=0; 1143 _avx2 = avx && (cf.extfeatures & AVX2_BIT) != 0; 1144 _hle = (cf.extfeatures & HLE_BIT) != 0; 1145 _rtm = (cf.extfeatures & RTM_BIT) != 0; 1146 _avx512f = (cf.extfeatures & AVX512F_BIT) != 0; 1147 _hasRdseed = (cf.extfeatures&RDSEED_BIT)!=0; 1148 _hasSha = (cf.extfeatures&SHA_BIT)!=0; 1149 _amd3dnow = (cf.amdfeatures&AMD_3DNOW_BIT)!=0; 1150 _amd3dnowExt = (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0; 1151 _amdMmx = (cf.amdfeatures&AMD_MMX_BIT)!=0; 1152 _hasFxsr = (cf.features&FXSR_BIT)!=0; 1153 _hasCmov = (cf.features&CMOV_BIT)!=0; 1154 _hasRdtsc = (cf.features&TIMESTAMP_BIT)!=0; 1155 _hasCmpxchg8b = (cf.features&CMPXCHG8B_BIT)!=0; 1156 _hasCmpxchg16b = (cf.miscfeatures&CMPXCHG16B_BIT)!=0; 1157 _hasSysEnterSysExit = 1158 // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII. 1159 // (REF: www.geoffchappell.com). 1160 (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3))))) 1161 ? false 1162 : (cf.features & SYSENTERSYSEXIT_BIT)!=0; 1163 _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0; 1164 _hasLahfSahf = (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0; 1165 _hasPopcnt = (cf.miscfeatures&POPCNT_BIT)!=0; 1166 _hasLzcnt = (cf.amdmiscfeatures&LZCNT_BIT)!=0; 1167 _isX86_64 = (cf.amdfeatures&AMD64_BIT)!=0; 1168 _isItanium = (cf.features&IA64_BIT)!=0; 1169 _hyperThreading = cf.maxThreads>cf.maxCores; 1170 _threadsPerCPU = cf.maxThreads; 1171 _coresPerCPU = cf.maxCores; 1172 _preferAthlon = cf.probablyAMD && family >=6; 1173 _preferPentium4 = cf.probablyIntel && family == 0xF; 1174 _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel); 1175 }