1 /**
2  * Identify the characteristics of the host CPU, providing information
3  * about cache sizes and assembly optimisation hints. This module is
4  * provided primarily for assembly language programmers.
5  *
6  * References:
7  * Some of this information was extremely difficult to track down. Some of the
8  * documents below were found only in cached versions stored by search engines!
9  * This code relies on information found in:
10  *
11  * $(UL
12  * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual,
13  *    Volume 2A: Instruction Set Reference, A-M" (2007).
14  * )
15  * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008).
16  * )
17  * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD
18  *    Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005).
19  * )
20  * $(LI "AMD Geode(TM) GX Processors Data Book",
21  *    Advanced Micro Devices, Publication ID 31505E, (2005).
22  * )
23  * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000).
24  * )
25  * $(LI "Application note 106: Software Customization for the 6x86 Family",
26  *    Cyrix Corporation, Rev 1.5 (1998)
27  * )
28  * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf))
29  * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution",
30  *   National Semiconductor, (2002)
31  * )
32  * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008).
33  * )
34  * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm))
35  * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf))
36  * $(LI "What every programmer should know about memory",
37  *    Ulrich Depper, Red Hat, Inc., (2007).
38  * )
39  * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009).
40  *   $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm)
41  * )
42  * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application
43  *    Note 485" (2009).
44  * )
45  * )
46  *
47  * Bugs: Currently only works on x86 and Itanium CPUs.
48  *      Many processors have bugs in their microcode for the CPUID instruction,
49  *      so sometimes the cache information may be incorrect.
50  *
51  * Copyright: Copyright Don Clugston 2007 - 2009.
52  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
53  * Authors:   Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk>
54  * Source:    $(DRUNTIMESRC core/_cpuid.d)
55  */
56 
57 module core.cpuid;
58 
59 version (GNU) version = GNU_OR_LDC;
60 version (LDC) version = GNU_OR_LDC;
61 
62 @trusted:
63 nothrow:
64 @nogc:
65 
66 // If optimizing for a particular processor, it is generally better
67 // to identify based on features rather than model. NOTE: Normally
68 // it's only worthwhile to optimise for the latest Intel and AMD CPU,
69 // with a backup for other CPUs.
70 // Pentium    -- preferPentium1()
71 // PMMX       --   + mmx()
72 // PPro       -- default
73 // PII        --   + mmx()
74 // PIII       --   + mmx() + sse()
75 // PentiumM   --   + mmx() + sse() + sse2()
76 // Pentium4   -- preferPentium4()
77 // PentiumD   --   + isX86_64()
78 // Core2      -- default + isX86_64()
79 // AMD K5     -- preferPentium1()
80 // AMD K6     --   + mmx()
81 // AMD K6-II  --   + mmx() + 3dnow()
82 // AMD K7     -- preferAthlon()
83 // AMD K8     --   + sse2()
84 // AMD K10    --   + isX86_64()
85 // Cyrix 6x86 -- preferPentium1()
86 //    6x86MX  --   + mmx()
87 
88 // GDC support uses extended inline assembly:
89 //   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html        (general information and hints)
90 //   https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html  (binding variables to registers)
91 //   https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names)
92 
93 public:
94 
95 /// Cache size and behaviour
96 struct CacheInfo
97 {
98     /// Size of the cache, in kilobytes, per CPU.
99     /// For L1 unified (data + code) caches, this size is half the physical size.
100     /// (we don't halve it for larger sizes, since normally
101     /// data size is much greater than code size for critical loops).
102     size_t size;
103     /// Number of ways of associativity, eg:
104     /// $(UL
105     /// $(LI 1 = direct mapped)
106     /// $(LI 2 = 2-way set associative)
107     /// $(LI 3 = 3-way set associative)
108     /// $(LI ubyte.max = fully associative)
109     /// )
110     ubyte associativity;
111     /// Number of bytes read into the cache when a cache miss occurs.
112     uint lineSize;
113 }
114 
115 public:
116     /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.)
117     // Note: When we deprecate it, we simply make it private.
118     __gshared CacheInfo[5] datacache;
119 
120 @property pure
121 {
122     /// The data caches. If there are fewer than 5 physical caches levels,
123     /// the remaining levels are set to size_t.max (== entire memory space)
124     const(CacheInfo)[5] dataCaches() { return _dataCaches; }
125 
126     /// Returns vendor string, for display purposes only.
127     /// Do NOT use this to determine features!
128     /// Note that some CPUs have programmable vendorIDs.
129     string vendor()     {return _vendor;}
130     /// Returns processor string, for display purposes only
131     string processor()  {return _processor;}
132 
133     /// Does it have an x87 FPU on-chip?
134     bool x87onChip()    {return _x87onChip;}
135     /// Is MMX supported?
136     bool mmx()          {return _mmx;}
137     /// Is SSE supported?
138     bool sse()          {return _sse;}
139     /// Is SSE2 supported?
140     bool sse2()         {return _sse2;}
141     /// Is SSE3 supported?
142     bool sse3()         {return _sse3;}
143     /// Is SSSE3 supported?
144     bool ssse3()         {return _ssse3;}
145     /// Is SSE4.1 supported?
146     bool sse41()        {return _sse41;}
147     /// Is SSE4.2 supported?
148     bool sse42()        {return _sse42;}
149     /// Is SSE4a supported?
150     bool sse4a()        {return _sse4a;}
151     /// Is AES supported
152     bool aes()          {return _aes;}
153     /// Is pclmulqdq supported
154     bool hasPclmulqdq() {return _hasPclmulqdq;}
155     /// Is rdrand supported
156     bool hasRdrand()    {return _hasRdrand;}
157     /// Is AVX supported
158     bool avx()          {return _avx;}
159     /// Is VEX-Encoded AES supported
160     bool vaes()         {return _vaes;}
161     /// Is vpclmulqdq supported
162     bool hasVpclmulqdq(){return _hasVpclmulqdq; }
163     /// Is FMA supported
164     bool fma()          {return _fma;}
165     /// Is FP16C supported
166     bool fp16c()        {return _fp16c;}
167     /// Is AVX2 supported
168     bool avx2()         {return _avx2;}
169     /// Is HLE (hardware lock elision) supported
170     bool hle()          {return _hle;}
171     /// Is RTM (restricted transactional memory) supported
172     bool rtm()          {return _rtm;}
173     /// Is AVX512F supported
174     bool avx512f()      {return _avx512f;}
175     /// Is rdseed supported
176     bool hasRdseed()    {return _hasRdseed;}
177     /// Is SHA supported
178     bool hasSha()       {return _hasSha;}
179     /// Is AMD 3DNOW supported?
180     bool amd3dnow()     {return _amd3dnow;}
181     /// Is AMD 3DNOW Ext supported?
182     bool amd3dnowExt()  {return _amd3dnowExt;}
183     /// Are AMD extensions to MMX supported?
184     bool amdMmx()       {return _amdMmx;}
185     /// Is fxsave/fxrstor supported?
186     bool hasFxsr()          {return _hasFxsr;}
187     /// Is cmov supported?
188     bool hasCmov()          {return _hasCmov;}
189     /// Is rdtsc supported?
190     bool hasRdtsc()         {return _hasRdtsc;}
191     /// Is cmpxchg8b supported?
192     bool hasCmpxchg8b()     {return _hasCmpxchg8b;}
193     /// Is cmpxchg8b supported?
194     bool hasCmpxchg16b()    {return _hasCmpxchg16b;}
195     /// Is SYSENTER/SYSEXIT supported?
196     bool hasSysEnterSysExit() {return _hasSysEnterSysExit;}
197     /// Is 3DNow prefetch supported?
198     bool has3dnowPrefetch()   {return _has3dnowPrefetch;}
199     /// Are LAHF and SAHF supported in 64-bit mode?
200     bool hasLahfSahf()        {return _hasLahfSahf;}
201     /// Is POPCNT supported?
202     bool hasPopcnt()        {return _hasPopcnt;}
203     /// Is LZCNT supported?
204     bool hasLzcnt()         {return _hasLzcnt;}
205     /// Is this an Intel64 or AMD 64?
206     bool isX86_64()         {return _isX86_64;}
207 
208     /// Is this an IA64 (Itanium) processor?
209     bool isItanium()        { return _isItanium; }
210 
211     /// Is hyperthreading supported?
212     bool hyperThreading()   { return _hyperThreading; }
213     /// Returns number of threads per CPU
214     uint threadsPerCPU()    {return _threadsPerCPU;}
215     /// Returns number of cores in CPU
216     uint coresPerCPU()      {return _coresPerCPU;}
217 
218     /// Optimisation hints for assembly code.
219     ///
220     /// For forward compatibility, the CPU is compared against different
221     /// microarchitectures. For 32-bit x86, comparisons are made against
222     /// the Intel PPro/PII/PIII/PM family.
223     ///
224     /// The major 32-bit x86 microarchitecture 'dynasties' have been:
225     ///
226     /// $(UL
227     /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). )
228     /// $(LI AMD Athlon (K7, K8, K10). )
229     /// $(LI Intel NetBurst (Pentium 4, Pentium D). )
230     /// $(LI In-order Pentium (Pentium1, PMMX, Atom) )
231     /// )
232     ///
233     /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta,
234     /// Cyrix, Rise) were mostly in-order.
235     ///
236     /// Some new processors do not fit into the existing categories:
237     ///
238     /// $(UL
239     /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. )
240     /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. )
241     /// )
242     ///
243     /// Within each dynasty, the optimisation techniques are largely
244     /// identical (eg, use instruction pairing for group 4). Major
245     /// instruction set improvements occur within each dynasty.
246 
247     /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code?
248     bool preferAthlon() { return _preferAthlon; }
249     /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code?
250     bool preferPentium4() { return _preferPentium4; }
251     /// Does this CPU perform better on Pentium I code than Pentium Pro code?
252     bool preferPentium1() { return _preferPentium1; }
253 }
254 
255 private immutable
256 {
257     /* These exist as immutables so that the query property functions can
258      * be backwards compatible with code that called them with ().
259      * Also, immutables can only be set by the static this().
260      */
261     const(CacheInfo)[5] _dataCaches;
262     string _vendor = "Browser";
263     string _processor = "wasm";
264     bool _x87onChip;
265     bool _mmx;
266     bool _sse;
267     bool _sse2;
268     bool _sse3;
269     bool _ssse3;
270     bool _sse41;
271     bool _sse42;
272     bool _sse4a;
273     bool _aes;
274     bool _hasPclmulqdq;
275     bool _hasRdrand;
276     bool _avx;
277     bool _vaes;
278     bool _hasVpclmulqdq;
279     bool _fma;
280     bool _fp16c;
281     bool _avx2;
282     bool _hle;
283     bool _rtm;
284     bool _avx512f;
285     bool _hasRdseed;
286     bool _hasSha;
287     bool _amd3dnow;
288     bool _amd3dnowExt;
289     bool _amdMmx;
290     bool _hasFxsr;
291     bool _hasCmov;
292     bool _hasRdtsc;
293     bool _hasCmpxchg8b;
294     bool _hasCmpxchg16b;
295     bool _hasSysEnterSysExit;
296     bool _has3dnowPrefetch;
297     bool _hasLahfSahf;
298     bool _hasPopcnt;
299     bool _hasLzcnt;
300     bool _isX86_64;
301     bool _isItanium;
302     bool _hyperThreading;
303     uint _threadsPerCPU;
304     uint _coresPerCPU;
305     bool _preferAthlon;
306     bool _preferPentium4;
307     bool _preferPentium1;
308 }
309 
310 __gshared:
311     // All these values are set only once, and never subsequently modified.
312 public:
313     /// $(RED Warning: This field will be turned into a property in a future release.)
314     ///
315     /// Processor type (vendor-dependent).
316     /// This should be visible ONLY for display purposes.
317     uint stepping, model, family;
318     /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.)
319     uint numCacheLevels = 1;
320     /// The number of cache levels in the CPU.
321     @property uint cacheLevels() { return numCacheLevels; }
322 private:
323 
324 struct CpuFeatures
325 {
326     bool probablyIntel; // true = _probably_ an Intel processor, might be faking
327     bool probablyAMD; // true = _probably_ an AMD or Hygon processor
328     string processorName;
329     char [12] vendorID = 0;
330     char [48] processorNameBuffer = 0;
331     uint features = 0;     // mmx, sse, sse2, hyperthreading, etc
332     uint miscfeatures = 0; // sse3, etc.
333     uint extfeatures = 0;  // HLE, AVX2, RTM, etc.
334     uint amdfeatures = 0;  // 3DNow!, mmxext, etc
335     uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc
336     ulong xfeatures = 0;   // XFEATURES_ENABLED_MASK
337     uint maxCores = 1;
338     uint maxThreads = 1;
339 }
340 
341 CpuFeatures cpuFeatures;
342 
343 /* Hide from the optimizer where cf (a register) is coming from, so that
344  * cf doesn't get "optimized away". The idea is to  reference
345  * the global data through cf so not so many fixups are inserted
346  * into the executable image.
347  */
348 CpuFeatures* getCpuFeatures() @nogc nothrow
349 {
350     pragma(inline, false);
351     return &cpuFeatures;
352 }
353 
354     // Note that this may indicate multi-core rather than hyperthreading.
355     @property bool hyperThreadingBit()    { return (cpuFeatures.features&HTT_BIT)!=0;}
356 
357     // feature flags CPUID1_EDX
358     enum : uint
359     {
360         FPU_BIT = 1,
361         TIMESTAMP_BIT = 1<<4, // rdtsc
362         MDSR_BIT = 1<<5,      // RDMSR/WRMSR
363         CMPXCHG8B_BIT = 1<<8,
364         SYSENTERSYSEXIT_BIT = 1<<11,
365         CMOV_BIT = 1<<15,
366         MMX_BIT = 1<<23,
367         FXSR_BIT = 1<<24,
368         SSE_BIT = 1<<25,
369         SSE2_BIT = 1<<26,
370         HTT_BIT = 1<<28,
371         IA64_BIT = 1<<30
372     }
373     // feature flags misc CPUID1_ECX
374     enum : uint
375     {
376         SSE3_BIT = 1,
377         PCLMULQDQ_BIT = 1<<1, // from AVX
378         MWAIT_BIT = 1<<3,
379         SSSE3_BIT = 1<<9,
380         FMA_BIT = 1<<12,     // from AVX
381         CMPXCHG16B_BIT = 1<<13,
382         SSE41_BIT = 1<<19,
383         SSE42_BIT = 1<<20,
384         POPCNT_BIT = 1<<23,
385         AES_BIT = 1<<25, // AES instructions from AVX
386         OSXSAVE_BIT = 1<<27, // Used for AVX
387         AVX_BIT = 1<<28,
388         FP16C_BIT = 1<<29,
389         RDRAND_BIT = 1<<30,
390     }
391     // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX.
392     enum : uint
393     {
394         FSGSBASE_BIT = 1 << 0,
395         SGX_BIT = 1 << 2,
396         BMI1_BIT = 1 << 3,
397         HLE_BIT = 1 << 4,
398         AVX2_BIT = 1 << 5,
399         SMEP_BIT = 1 << 7,
400         BMI2_BIT = 1 << 8,
401         ERMS_BIT = 1 << 9,
402         INVPCID_BIT = 1 << 10,
403         RTM_BIT = 1 << 11,
404         AVX512F_BIT = 1 << 16,
405         AVX512DQ_BIT = 1 << 17,
406         RDSEED_BIT = 1 << 18,
407         ADX_BIT = 1 << 19,
408         AVX512IFMA_BIT = 1 << 21,
409         CLFLUSHOPT_BIT = 1 << 23,
410         CLWB_BIT = 1 << 24,
411         AVX512PF_BIT = 1 << 26,
412         AVX512ER_BIT = 1 << 27,
413         AVX512CD_BIT = 1 << 28,
414         SHA_BIT = 1 << 29,
415         AVX512BW_BIT = 1 << 30,
416         AVX512VL_BIT = 1 << 31,
417     }
418     // feature flags XFEATURES_ENABLED_MASK
419     enum : ulong
420     {
421         XF_FP_BIT  = 0x1,
422         XF_SSE_BIT = 0x2,
423         XF_YMM_BIT = 0x4,
424     }
425     // AMD feature flags CPUID80000001_EDX
426     enum : uint
427     {
428         AMD_MMX_BIT = 1<<22,
429 //      FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions.
430         FFXSR_BIT = 1<<25,
431         PAGE1GB_BIT = 1<<26, // support for 1GB pages
432         RDTSCP_BIT = 1<<27,
433         AMD64_BIT = 1<<29,
434         AMD_3DNOW_EXT_BIT = 1<<30,
435         AMD_3DNOW_BIT = 1<<31
436     }
437     // AMD misc feature flags CPUID80000001_ECX
438     enum : uint
439     {
440         LAHFSAHF_BIT = 1,
441         LZCNT_BIT = 1<<5,
442         SSE4A_BIT = 1<<6,
443         AMD_3DNOW_PREFETCH_BIT = 1<<8,
444     }
445 
446 
447 version (GNU_OR_LDC) {
448     version (X86)
449         enum supportedX86 = true;
450     else version (X86_64)
451         enum supportedX86 = true;
452     else
453         enum supportedX86 = false;
454 } else version (D_InlineAsm_X86) {
455     enum supportedX86 = true;
456 } else version (D_InlineAsm_X86_64) {
457     enum supportedX86 = true;
458 } else {
459     enum supportedX86 = false;
460 }
461 
462 version (WASI) {} else: // WASI/WASM doesn't support cpuid
463 
464 static if (supportedX86) {
465 // Note that this code will also work for Itanium in x86 mode.
466 
467 __gshared uint max_cpuid, max_extended_cpuid;
468 
469 // CPUID2: "cache and tlb information"
470 void getcacheinfoCPUID2()
471 {
472     // We are only interested in the data caches
473     void decipherCpuid2(ubyte x) @nogc nothrow {
474         if (x==0) return;
475         // Values from http://www.sandpile.org/ia32/cpuid.htm.
476         // Includes Itanium and non-Intel CPUs.
477         //
478         static immutable ubyte [63] ids = [
479             0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68,
480             // level 2 cache
481             0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F,
482             0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E,
483             0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81,
484             // level 3 cache
485             0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D,
486 
487             0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE,
488             0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC
489         ];
490         static immutable uint [63] sizes = [
491             8, 16, 16, 64, 16, 24, 8, 16, 32,
492             128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512,
493             256, 512, 1024, 2048, 512, 1024, 4096, 6*1024,
494             128, 192, 128, 256, 384, 512, 3072, 512, 128,
495             512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024,
496 
497             512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024,
498             2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024
499         ];
500     // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative
501         static immutable ubyte [63] ways = [
502             2, 4, 4, 8, 8, 6, 4, 4, 4,
503             4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2,
504             8, 8, 8, 8, 4, 8, 16, 24,
505             4, 6, 2, 4, 6, 4, 12, 8, 8,
506             4, 8, 8, 8, 4, 8, 12, 16, 12, 16,
507             4, 4, 4, 8, 8, 8, 12, 12, 12,
508             16, 16, 16, 24, 24, 24
509         ];
510         enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 }
511         for (size_t i=0; i< ids.length; ++i) {
512             if (x==ids[i]) {
513                 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2;
514                 if (x==0x49 && family==0xF && model==0x6) level=2;
515                 datacache[level].size=sizes[i];
516                 datacache[level].associativity=ways[i];
517                 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80)
518                                    || x==0x86 || x==0x87
519                                    || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){
520                     datacache[level].lineSize = 64;
521                 } else datacache[level].lineSize = 32;
522             }
523         }
524     }
525 
526     uint[4] a;
527     bool firstTime = true;
528     // On a multi-core system, this could theoretically fail, but it's only used
529     // for old single-core CPUs.
530     uint numinfos = 1;
531     do {
532         version (GNU_OR_LDC) asm pure nothrow @nogc {
533             "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2);
534         } else asm pure nothrow @nogc {
535             mov EAX, 2;
536             cpuid;
537             mov a+0, EAX;
538             mov a+4, EBX;
539             mov a+8, ECX;
540             mov a+12, EDX;
541         }
542         if (firstTime) {
543             if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) {
544         // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080.
545         // These are NOT standard Intel values
546         // (TLB = 32 entry, 4 way associative, 4K pages)
547         // (L1 cache = 16K, 4way, linesize16)
548                 datacache[0].size=8;
549                 datacache[0].associativity=4;
550                 datacache[0].lineSize=16;
551                 return;
552             }
553             // lsb of a is how many times to loop.
554             numinfos = a[0] & 0xFF;
555             // and otherwise it should be ignored
556             a[0] &= 0xFFFF_FF00;
557             firstTime = false;
558         }
559         for (int c=0; c<4;++c) {
560             // high bit set == no info.
561             if (a[c] & 0x8000_0000) continue;
562             decipherCpuid2(cast(ubyte)(a[c] & 0xFF));
563             decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF));
564             decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF));
565             decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF));
566         }
567     } while (--numinfos);
568 }
569 
570 // CPUID4: "Deterministic cache parameters" leaf
571 void getcacheinfoCPUID4()
572 {
573     int cachenum = 0;
574     for (;;) {
575         uint a, b, number_of_sets;
576         version (GNU_OR_LDC) asm pure nothrow @nogc {
577             "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx";
578         } else asm pure nothrow @nogc {
579             mov EAX, 4;
580             mov ECX, cachenum;
581             cpuid;
582             mov a, EAX;
583             mov b, EBX;
584             mov number_of_sets, ECX;
585         }
586         ++cachenum;
587         if ((a&0x1F)==0) break; // no more caches
588         immutable uint numthreads = ((a>>14) & 0xFFF)  + 1;
589         immutable uint numcores = ((a>>26) & 0x3F) + 1;
590         if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
591         if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches
592 
593         ++number_of_sets;
594         immutable ubyte level = cast(ubyte)(((a>>5)&7)-1);
595         if (level > datacache.length) continue; // ignore deep caches
596         datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1);
597         datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size
598         immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1;
599         // Size = number of sets * associativity * cachelinesize * linepartitions
600         // and must convert to Kb, also dividing by the number of hyperthreads using this cache.
601         immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets *
602             datacache[level].associativity : number_of_sets;
603         datacache[level].size = cast(size_t)(
604                 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024));
605         if (level == 0 && (a&0xF)==3) {
606             // Halve the size for unified L1 caches
607             datacache[level].size/=2;
608         }
609     }
610 }
611 
612 // CPUID8000_0005 & 6
613 void getAMDcacheinfo()
614 {
615     uint dummy, c5, c6, d6;
616     version (GNU_OR_LDC) asm pure nothrow @nogc {
617         "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx";
618     } else asm pure nothrow @nogc {
619         mov EAX, 0x8000_0005; // L1 cache
620         cpuid;
621         // EAX has L1_TLB_4M.
622         // EBX has L1_TLB_4K
623         // EDX has L1 instruction cache
624         mov c5, ECX;
625     }
626 
627     datacache[0].size = ( (c5>>24) & 0xFF);
628     datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF);
629     datacache[0].lineSize = c5 & 0xFF;
630 
631     if (max_extended_cpuid >= 0x8000_0006) {
632         // AMD K6-III or K6-2+ or later.
633         uint numcores = 1;
634         if (max_extended_cpuid >= 0x8000_0008) {
635             // read the number of physical cores (minus 1) from the 8 lowest ECX bits
636             version (GNU_OR_LDC) asm pure nothrow @nogc {
637                 "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx";
638             } else asm pure nothrow @nogc {
639                 mov EAX, 0x8000_0008;
640                 cpuid;
641                 mov numcores, ECX;
642             }
643             numcores = (numcores & 0xFF) + 1;
644             if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores;
645         }
646 
647         version (GNU_OR_LDC) asm pure nothrow @nogc {
648             "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx";
649         } else asm pure nothrow @nogc {
650             mov EAX, 0x8000_0006; // L2/L3 cache
651             cpuid;
652             mov c6, ECX; // L2 cache info
653             mov d6, EDX; // L3 cache info
654         }
655 
656         static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ];
657         datacache[1].size = (c6>>16) & 0xFFFF;
658         datacache[1].associativity = assocmap[(c6>>12)&0xF];
659         datacache[1].lineSize = c6 & 0xFF;
660 
661         // The L3 cache value is TOTAL, not per core.
662         datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1.
663         datacache[2].associativity = assocmap[(d6>>12)&0xF];
664         datacache[2].lineSize = d6 & 0xFF;
665     }
666 }
667 
668 // For Intel CoreI7 and later, use function 0x0B
669 // to determine number of processors.
670 void getCpuInfo0B()
671 {
672     int threadsPerCore;
673     uint a, b, c, d;
674     // I'm not sure about this. The docs state that there
675     // are 2 hyperthreads per core if HT is factory enabled.
676     for (int level = 0; level < 2; level++)
677     {
678         version (GNU_OR_LDC) asm pure nothrow @nogc {
679             "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level);
680         } else asm pure nothrow @nogc {
681             mov EAX, 0x0B;
682             mov ECX, level;
683             cpuid;
684             mov a, EAX;
685             mov b, EBX;
686             mov c, ECX;
687             mov d, EDX;
688         }
689         if (b != 0)
690         {
691             if (level == 0)
692                 threadsPerCore = b & 0xFFFF;
693             else if (level == 1)
694             {
695                 cpuFeatures.maxThreads = b & 0xFFFF;
696                 cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore;
697             }
698         }
699         // Got "invalid domain" returned from cpuid
700         if (a == 0 && b == 0)
701             break;
702     }
703 }
704 
705 void cpuidX86()
706 {
707     auto cf = getCpuFeatures();
708 
709     uint a, b, c, d;
710     uint* venptr = cast(uint*)cf.vendorID.ptr;
711     version (GNU_OR_LDC)
712     {
713         asm pure nothrow @nogc {
714             "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0);
715             "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx";
716         }
717     }
718     else
719     {
720         uint a2;
721         version (D_InlineAsm_X86)
722         {
723             asm pure nothrow @nogc {
724                 mov EAX, 0;
725                 cpuid;
726                 mov a, EAX;
727                 mov EAX, venptr;
728                 mov [EAX], EBX;
729                 mov [EAX + 4], EDX;
730                 mov [EAX + 8], ECX;
731             }
732         }
733         else version (D_InlineAsm_X86_64)
734         {
735             asm pure nothrow @nogc {
736                 mov EAX, 0;
737                 cpuid;
738                 mov a, EAX;
739                 mov RAX, venptr;
740                 mov [RAX], EBX;
741                 mov [RAX + 4], EDX;
742                 mov [RAX + 8], ECX;
743             }
744         }
745         asm pure nothrow @nogc {
746             mov EAX, 0x8000_0000;
747             cpuid;
748             mov a2, EAX;
749         }
750         max_cpuid = a;
751         max_extended_cpuid = a2;
752     }
753 
754 
755     cf.probablyIntel = cf.vendorID == "GenuineIntel";
756     cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine");
757     uint apic = 0; // brand index, apic id
758     version (GNU_OR_LDC) asm pure nothrow @nogc {
759         "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1);
760     } else {
761         asm pure nothrow @nogc {
762             mov EAX, 1; // model, stepping
763             cpuid;
764             mov a, EAX;
765             mov apic, EBX;
766             mov c, ECX;
767             mov d, EDX;
768         }
769         cf.features = d;
770         cf.miscfeatures = c;
771     }
772     stepping = a & 0xF;
773     immutable uint fbase = (a >> 8) & 0xF;
774     immutable uint mbase = (a >> 4) & 0xF;
775     family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase;
776     model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ?
777          mbase + ((a >> 12) & 0xF0) : mbase;
778 
779     if (max_cpuid >= 7)
780     {
781         version (GNU_OR_LDC) asm pure nothrow @nogc {
782             "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx";
783         } else {
784             uint ext;
785             asm pure nothrow @nogc {
786                 mov EAX, 7; // Structured extended feature leaf.
787                 mov ECX, 0; // Main leaf.
788                 cpuid;
789                 mov ext, EBX; // HLE, AVX2, RTM, etc.
790             }
791             cf.extfeatures = ext;
792         }
793     }
794 
795     if (cf.miscfeatures & OSXSAVE_BIT)
796     {
797         version (GNU_OR_LDC) asm pure nothrow @nogc {
798             /* Old assemblers do not recognize xgetbv, and there is no easy way
799              * to conditionally compile based on the assembler used, so use the
800              * raw .byte sequence instead.  */
801             ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0);
802         } else asm pure nothrow @nogc {
803             mov ECX, 0;
804             xgetbv;
805             mov d, EDX;
806             mov a, EAX;
807         }
808         cf.xfeatures = cast(ulong)d << 32 | a;
809     }
810 
811     cf.amdfeatures = 0;
812     cf.amdmiscfeatures = 0;
813     if (max_extended_cpuid >= 0x8000_0001) {
814         version (GNU_OR_LDC) asm pure nothrow @nogc {
815             "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx";
816         } else {
817             asm pure nothrow @nogc {
818                 mov EAX, 0x8000_0001;
819                 cpuid;
820                 mov c, ECX;
821                 mov d, EDX;
822             }
823             cf.amdmiscfeatures = c;
824             cf.amdfeatures = d;
825         }
826     }
827     // Try to detect fraudulent vendorIDs
828     if (amd3dnow) cf.probablyIntel = false;
829 
830     if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) {
831         //http://support.amd.com/TechDocs/25481.pdf pg.36
832         cf.maxCores = 1;
833         if (hyperThreadingBit) {
834             // determine max number of cores for AMD
835             version (GNU_OR_LDC) asm pure nothrow @nogc {
836                 "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx";
837             } else asm pure nothrow @nogc {
838                 mov EAX, 0x8000_0008;
839                 cpuid;
840                 mov c, ECX;
841             }
842             cf.maxCores += c & 0xFF;
843         }
844     }
845 
846     if (max_extended_cpuid >= 0x8000_0004) {
847         uint* pnb = cast(uint*)cf.processorNameBuffer.ptr;
848         version (GNU_OR_LDC)
849         {
850             asm pure nothrow @nogc {
851                 "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002);
852                 "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003);
853                 "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004);
854             }
855         }
856         else version (D_InlineAsm_X86)
857         {
858             asm pure nothrow @nogc {
859                 push ESI;
860                 mov ESI, pnb;
861                 mov EAX, 0x8000_0002;
862                 cpuid;
863                 mov [ESI], EAX;
864                 mov [ESI+4], EBX;
865                 mov [ESI+8], ECX;
866                 mov [ESI+12], EDX;
867                 mov EAX, 0x8000_0003;
868                 cpuid;
869                 mov [ESI+16], EAX;
870                 mov [ESI+20], EBX;
871                 mov [ESI+24], ECX;
872                 mov [ESI+28], EDX;
873                 mov EAX, 0x8000_0004;
874                 cpuid;
875                 mov [ESI+32], EAX;
876                 mov [ESI+36], EBX;
877                 mov [ESI+40], ECX;
878                 mov [ESI+44], EDX;
879                 pop ESI;
880             }
881         }
882         else version (D_InlineAsm_X86_64)
883         {
884             asm pure nothrow @nogc {
885                 push RSI;
886                 mov RSI, pnb;
887                 mov EAX, 0x8000_0002;
888                 cpuid;
889                 mov [RSI], EAX;
890                 mov [RSI+4], EBX;
891                 mov [RSI+8], ECX;
892                 mov [RSI+12], EDX;
893                 mov EAX, 0x8000_0003;
894                 cpuid;
895                 mov [RSI+16], EAX;
896                 mov [RSI+20], EBX;
897                 mov [RSI+24], ECX;
898                 mov [RSI+28], EDX;
899                 mov EAX, 0x8000_0004;
900                 cpuid;
901                 mov [RSI+32], EAX;
902                 mov [RSI+36], EBX;
903                 mov [RSI+40], ECX;
904                 mov [RSI+44], EDX;
905                 pop RSI;
906             }
907         }
908         // Intel P4 and PM pad at front with spaces.
909         // Other CPUs pad at end with nulls.
910         int start = 0, end = 0;
911         while (cf.processorNameBuffer[start] == ' ') { ++start; }
912         while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; }
913         cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]);
914     } else {
915         cf.processorName = "Unknown CPU";
916     }
917     // Determine cache sizes
918 
919     // Intel docs specify that they return 0 for 0x8000_0005.
920     // AMD docs do not specify the behaviour for 0004 and 0002.
921     // Centaur/VIA and most other manufacturers use the AMD method,
922     // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2!
923     // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour
924     // for CPUID80000005. But Geode GX uses the AMD method
925 
926     // Deal with Geode GX1 - make it same as MediaGX MMX.
927     if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) {
928         max_extended_cpuid = 0x8000_0004;
929     }
930     // Therefore, we try the AMD method unless it's an Intel chip.
931     // If we still have no info, try the Intel methods.
932     datacache[0].size = 0;
933     if (max_cpuid<2 || !cf.probablyIntel) {
934         if (max_extended_cpuid >= 0x8000_0005) {
935             getAMDcacheinfo();
936         } else if (cf.probablyAMD) {
937             // According to AMDProcRecognitionAppNote, this means CPU
938             // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4)
939             // Am5x86 has 16Kb 4-way unified data & code cache.
940             datacache[0].size = 8;
941             datacache[0].associativity = 4;
942             datacache[0].lineSize = 32;
943         } else {
944             // Some obscure CPU.
945             // Values for Cyrix 6x86MX (family 6, model 0)
946             datacache[0].size = 64;
947             datacache[0].associativity = 4;
948             datacache[0].lineSize = 32;
949         }
950     }
951     if ((datacache[0].size == 0) && max_cpuid>=4) {
952         getcacheinfoCPUID4();
953     }
954     if ((datacache[0].size == 0) && max_cpuid>=2) {
955         getcacheinfoCPUID2();
956     }
957     if (datacache[0].size == 0) {
958         // Pentium, PMMX, late model 486, or an obscure CPU
959         if (mmx) { // Pentium MMX. Also has 8kB code cache.
960             datacache[0].size = 16;
961             datacache[0].associativity = 4;
962             datacache[0].lineSize = 32;
963         } else { // Pentium 1 (which also has 8kB code cache)
964                  // or 486.
965             // Cyrix 6x86: 16, 4way, 32 linesize
966             datacache[0].size = 8;
967             datacache[0].associativity = 2;
968             datacache[0].lineSize = 32;
969         }
970     }
971     if (cf.probablyIntel && max_cpuid >= 0x0B) {
972         // For Intel i7 and later, use function 0x0B to determine
973         // cores and hyperthreads.
974         getCpuInfo0B();
975     } else {
976         if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF;
977         else cf.maxThreads = cf.maxCores;
978 
979         if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) {
980             version (GNU_OR_LDC) asm pure nothrow @nogc {
981                 "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx";
982             } else {
983                 asm pure nothrow @nogc {
984                     mov EAX, 0x8000_001e;
985                     cpuid;
986                     mov b, EBX;
987                 }
988             }
989             ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1;
990             cf.maxCores = cf.maxThreads / coresPerComputeUnit;
991         }
992     }
993 }
994 
995 // Return true if the cpuid instruction is supported.
996 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines.
997 bool hasCPUID()
998 {
999     version (X86_64)
1000         return true;
1001     else
1002     {
1003         uint flags;
1004         version (GNU_OR_LDC)
1005         {
1006             // http://wiki.osdev.org/CPUID#Checking_CPUID_availability
1007             asm nothrow @nogc { "
1008                 pushfl                    # Save EFLAGS
1009                 pushfl                    # Store EFLAGS
1010                 xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS
1011                 popfl                     # Load stored EFLAGS (with ID bit inverted)
1012                 pushfl                    # Store EFLAGS again (ID bit may or may not be inverted)
1013                 popl %%eax                # eax = modified EFLAGS (ID bit may or may not be inverted)
1014                 xorl (%%esp), %%eax       # eax = whichever bits were changed
1015                 popfl                     # Restore original EFLAGS
1016                 " : "=a" (flags);
1017             }
1018         }
1019         else version (D_InlineAsm_X86)
1020         {
1021             asm nothrow @nogc {
1022                 pushfd;
1023                 pop EAX;
1024                 mov flags, EAX;
1025                 xor EAX, 0x0020_0000;
1026                 push EAX;
1027                 popfd;
1028                 pushfd;
1029                 pop EAX;
1030                 xor flags, EAX;
1031             }
1032         }
1033         return (flags & 0x0020_0000) != 0;
1034     }
1035 }
1036 
1037 } else { // supported X86
1038 
1039     bool hasCPUID() { return false; }
1040 
1041     void cpuidX86()
1042     {
1043             datacache[0].size = 8;
1044             datacache[0].associativity = 2;
1045             datacache[0].lineSize = 32;
1046     }
1047 }
1048 
1049 /*
1050 // TODO: Implement this function with OS support
1051 void cpuidPPC()
1052 {
1053     enum :int  { PPC601, PPC603, PPC603E, PPC604,
1054                  PPC604E, PPC620, PPCG3, PPCG4, PPCG5 }
1055 
1056     // TODO:
1057     // asm { mfpvr; } returns the CPU version but unfortunately it can
1058     // only be used in kernel mode. So OS support is required.
1059     int cputype = PPC603;
1060 
1061     // 601 has a 8KB combined data & code L1 cache.
1062     uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64];
1063     ubyte ways[] = [8, 2,  4,  4,  4,  8,  8,  8,  8];
1064     uint L2size[]= [0, 0,  0,  0,  0,  0,  0,  256,  512];
1065     uint L3size[]= [0, 0,  0,  0,  0,  0,  0,  2048,  0];
1066 
1067     datacache[0].size = sizes[cputype];
1068     datacache[0].associativity = ways[cputype];
1069     datacache[0].lineSize = (cputype==PPCG5)? 128 :
1070         (cputype == PPC620 || cputype == PPCG3)? 64 : 32;
1071     datacache[1].size = L2size[cputype];
1072     datacache[2].size = L3size[cputype];
1073     datacache[1].lineSize = datacache[0].lineSize;
1074     datacache[2].lineSize = datacache[0].lineSize;
1075 }
1076 
1077 // TODO: Implement this function with OS support
1078 void cpuidSparc()
1079 {
1080     // UltaSparcIIi  : L1 = 16,  2way. L2 = 512, 4 way.
1081     // UltraSparcIII : L1 = 64,  4way. L2= 4096 or 8192.
1082     // UltraSparcIIIi: L1 = 64,  4way. L2= 1024, 4 way
1083     // UltraSparcIV  : L1 = 64,  4way. L2 = 16*1024.
1084     // UltraSparcIV+ : L1 = 64,  4way. L2 = 2048, L3=32*1024.
1085     // Sparc64V      : L1 = 128, 2way. L2 = 4096 4way.
1086 }
1087 */
1088 
1089 pragma(crt_constructor) void cpuid_initialization()
1090 {
1091     auto cf = getCpuFeatures();
1092 
1093     if (hasCPUID()) {
1094         cpuidX86();
1095     } else {
1096         // it's a 386 or 486, or a Cyrix 6x86.
1097         //Probably still has an external cache.
1098     }
1099     if (datacache[0].size==0) {
1100             // Guess same as Pentium 1.
1101             datacache[0].size = 8;
1102             datacache[0].associativity = 2;
1103             datacache[0].lineSize = 32;
1104     }
1105     numCacheLevels = 1;
1106     // And now fill up all the unused levels with full memory space.
1107     for (size_t i=1; i< datacache.length; ++i) {
1108         if (datacache[i].size==0) {
1109             // Set all remaining levels of cache equal to full address space.
1110             datacache[i].size = size_t.max/1024;
1111             datacache[i].associativity = 1;
1112             datacache[i].lineSize = datacache[i-1].lineSize;
1113         }
1114         else
1115             ++numCacheLevels;
1116     }
1117 
1118     // Set the immortals
1119 
1120     _dataCaches =     datacache;
1121     _vendor =         cast(string)cf.vendorID;
1122     _processor =      cf.processorName;
1123     _x87onChip =      (cf.features&FPU_BIT)!=0;
1124     _mmx =            (cf.features&MMX_BIT)!=0;
1125     _sse =            (cf.features&SSE_BIT)!=0;
1126     _sse2 =           (cf.features&SSE2_BIT)!=0;
1127     _sse3 =           (cf.miscfeatures&SSE3_BIT)!=0;
1128     _ssse3 =          (cf.miscfeatures&SSSE3_BIT)!=0;
1129     _sse41 =          (cf.miscfeatures&SSE41_BIT)!=0;
1130     _sse42 =          (cf.miscfeatures&SSE42_BIT)!=0;
1131     _sse4a =          (cf.amdmiscfeatures&SSE4A_BIT)!=0;
1132     _aes =            (cf.miscfeatures&AES_BIT)!=0;
1133     _hasPclmulqdq =   (cf.miscfeatures&PCLMULQDQ_BIT)!=0;
1134     _hasRdrand =      (cf.miscfeatures&RDRAND_BIT)!=0;
1135 
1136     enum avx_mask = XF_SSE_BIT|XF_YMM_BIT;
1137     _avx =            (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0;
1138 
1139     _vaes =           avx && aes;
1140     _hasVpclmulqdq =  avx && hasPclmulqdq;
1141     _fma =            avx && (cf.miscfeatures&FMA_BIT)!=0;
1142     _fp16c =          avx && (cf.miscfeatures&FP16C_BIT)!=0;
1143     _avx2 =           avx && (cf.extfeatures & AVX2_BIT) != 0;
1144     _hle =            (cf.extfeatures & HLE_BIT) != 0;
1145     _rtm =            (cf.extfeatures & RTM_BIT) != 0;
1146     _avx512f =        (cf.extfeatures & AVX512F_BIT) != 0;
1147     _hasRdseed =      (cf.extfeatures&RDSEED_BIT)!=0;
1148     _hasSha =         (cf.extfeatures&SHA_BIT)!=0;
1149     _amd3dnow =       (cf.amdfeatures&AMD_3DNOW_BIT)!=0;
1150     _amd3dnowExt =    (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0;
1151     _amdMmx =         (cf.amdfeatures&AMD_MMX_BIT)!=0;
1152     _hasFxsr =        (cf.features&FXSR_BIT)!=0;
1153     _hasCmov =        (cf.features&CMOV_BIT)!=0;
1154     _hasRdtsc =       (cf.features&TIMESTAMP_BIT)!=0;
1155     _hasCmpxchg8b =   (cf.features&CMPXCHG8B_BIT)!=0;
1156     _hasCmpxchg16b =  (cf.miscfeatures&CMPXCHG16B_BIT)!=0;
1157     _hasSysEnterSysExit =
1158         // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII.
1159         // (REF: www.geoffchappell.com).
1160         (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3)))))
1161             ? false
1162             : (cf.features & SYSENTERSYSEXIT_BIT)!=0;
1163     _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0;
1164     _hasLahfSahf =    (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0;
1165     _hasPopcnt =      (cf.miscfeatures&POPCNT_BIT)!=0;
1166     _hasLzcnt =       (cf.amdmiscfeatures&LZCNT_BIT)!=0;
1167     _isX86_64 =       (cf.amdfeatures&AMD64_BIT)!=0;
1168     _isItanium =      (cf.features&IA64_BIT)!=0;
1169     _hyperThreading = cf.maxThreads>cf.maxCores;
1170     _threadsPerCPU =  cf.maxThreads;
1171     _coresPerCPU =    cf.maxCores;
1172     _preferAthlon =   cf.probablyAMD && family >=6;
1173     _preferPentium4 = cf.probablyIntel && family == 0xF;
1174     _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel);
1175 }