1313#include <stdlib.h>
1414
1515extern void aws_run_cpuid (uint32_t eax , uint32_t ecx , uint32_t * abcd );
16+ extern uint64_t aws_run_xgetbv (uint32_t xcr );
1617
17- typedef bool (has_feature_fn )(void );
18+ static bool s_cpu_features [AWS_CPU_FEATURE_COUNT ];
19+ static bool s_cpu_features_cached ;
1820
19- static bool s_has_clmul (void ) {
21+ static void s_cache_cpu_features (void ) {
22+ /***************************************************************************
23+ * First, find the max EAX value we can pass to CPUID without undefined behavior
24+ * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=0:_Highest_Function_Parameter_and_Manufacturer_ID
25+ **************************************************************************/
2026 uint32_t abcd [4 ];
21- uint32_t clmul_mask = 0x00000002 ;
22- aws_run_cpuid (1 , 0 , abcd );
23-
24- if ((abcd [2 ] & clmul_mask ) != clmul_mask )
25- return false;
26-
27- return true;
28- }
29-
30- static bool s_has_sse41 (void ) {
31- uint32_t abcd [4 ];
32- uint32_t sse41_mask = 0x00080000 ;
33- aws_run_cpuid (1 , 0 , abcd );
34-
35- if ((abcd [2 ] & sse41_mask ) != sse41_mask )
36- return false;
37-
38- return true;
39- }
40-
41- static bool s_has_sse42 (void ) {
42- uint32_t abcd [4 ];
43- uint32_t sse42_mask = 0x00100000 ;
44- aws_run_cpuid (1 , 0 , abcd );
45-
46- if ((abcd [2 ] & sse42_mask ) != sse42_mask )
47- return false;
48-
49- return true;
50- }
27+ aws_run_cpuid (0x0 , 0x0 , abcd );
28+ const uint32_t max_cpuid_eax_value = abcd [0 ]; /* max-value = EAX */
29+
30+ /**************************************************************************
31+ * CPUID(EAX=1H, ECX=0H): Processor Info and Feature Bits
32+ * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=1:_Processor_Info_and_Feature_Bits
33+ **************************************************************************/
34+ if (0x1 > max_cpuid_eax_value ) {
35+ return ;
36+ }
37+ aws_run_cpuid (0x1 , 0x0 , abcd );
38+ s_cpu_features [AWS_CPU_FEATURE_CLMUL ] = abcd [2 ] & (1 << 1 ); /* pclmulqdq = ECX[bit 1] */
39+ s_cpu_features [AWS_CPU_FEATURE_SSE_4_1 ] = abcd [2 ] & (1 << 19 ); /* sse4.1 = ECX[bit 19] */
40+ s_cpu_features [AWS_CPU_FEATURE_SSE_4_2 ] = abcd [2 ] & (1 << 20 ); /* sse4.2 = ECX[bit 20] */
41+
42+ /* NOTE: Even if the AVX flag is set, it's not necessarily usable.
43+ * We need to check that OSXSAVE is enabled, and check further capabilities via XGETBV.
44+ * GCC had the same bug until 7.4: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100 */
45+ bool avx_usable = false;
46+ bool avx512_usable = false;
47+ bool feature_osxsave = abcd [2 ] & (1 << 27 ); /* osxsave = ECX[bit 27] */
48+ if (feature_osxsave ) {
49+ /* Check XCR0 (Extended Control Register 0) via XGETBV
50+ * https://en.wikipedia.org/w/index.php?title=Control_register&oldid=1268423710#XCR0_and_XSS */
51+ uint64_t xcr0 = aws_run_xgetbv (0 );
52+ const uint64_t avx_mask = (1 << 1 ) /* SSE = XCR0[bit 1] */
53+ | (1 << 2 ) /* AVX = XCR0[bit 2] */ ;
54+ avx_usable = (xcr0 & avx_mask ) == avx_mask ;
55+
56+ const uint64_t avx512_mask = (1 << 5 ) /* OPMASK = XCR0[bit 5] */
57+ | (1 << 6 ) /* ZMM_Hi256 = XCR0[bit 6] */
58+ | (1 << 7 ) /* Hi16_ZMM = XCR0[bit 7] */
59+ | avx_mask ;
60+ avx512_usable = (xcr0 & avx512_mask ) == avx512_mask ;
61+ }
5162
52- static bool s_has_avx2 (void ) {
53- uint32_t abcd [4 ];
63+ bool feature_avx = false;
64+ if (avx_usable ) {
65+ feature_avx = abcd [2 ] & (1 << 28 ); /* avx = ECX[bit 28] */
66+ }
5467
55- /* Check AVX2:
56- * CPUID. (EAX=07H , ECX=0H):EBX.AVX2[bit 5]==1 */
57- uint32_t avx2_mask = ( 1 << 5 );
58- aws_run_cpuid ( 7 , 0 , abcd );
59- if (( abcd [ 1 ] & avx2_mask ) != avx2_mask ) {
60- return false ;
68+ /***************************************************************************
69+ * CPUID(EAX=7H , ECX=0H): Extended Features
70+ * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=7,_ECX=0:_Extended_Features
71+ **************************************************************************/
72+ if (0x7 > max_cpuid_eax_value ) {
73+ return ;
6174 }
75+ aws_run_cpuid (0x7 , 0x0 , abcd );
76+ s_cpu_features [AWS_CPU_FEATURE_BMI2 ] = abcd [1 ] & (1 << 8 ); /* bmi2 = EBX[bit 8] */
77+ s_cpu_features [AWS_CPU_FEATURE_VPCLMULQDQ ] = abcd [2 ] & (1 << 10 ); /* vpclmulqdq = ECX[bit 10] */
6278
63- /* Also check AVX:
64- * CPUID.(EAX=01H, ECX=0H):ECX.AVX[bit 28]==1
65- *
66- * NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
79+ /* NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
6780 * But we've received crash reports where the AVX2 feature check passed
6881 * and then an AVX instruction caused an "invalid instruction" crash.
6982 *
@@ -76,69 +89,22 @@ static bool s_has_avx2(void) {
7689 *
7790 * We don't know for sure what was up with those machines, but this extra
7891 * check should stop them from running our AVX/AVX2 code paths. */
79- uint32_t avx1_mask = (1 << 28 );
80- aws_run_cpuid (1 , 0 , abcd );
81- if ((abcd [2 ] & avx1_mask ) != avx1_mask ) {
82- return false;
83- }
84-
85- return true;
86- }
87-
88- static bool s_has_avx512 (void ) {
89- uint32_t abcd [4 ];
90-
91- /* Check AVX512F:
92- * CPUID.(EAX=07H, ECX=0H):EBX.AVX512[bit 16]==1 */
93- uint32_t avx512_mask = (1 << 16 );
94- aws_run_cpuid (7 , 0 , abcd );
95- if ((abcd [1 ] & avx512_mask ) != avx512_mask ) {
96- return false;
97- }
98-
99- return true;
100- }
101-
102- static bool s_has_bmi2 (void ) {
103- uint32_t abcd [4 ];
104-
105- /* Check BMI2:
106- * CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
107- uint32_t bmi2_mask = (1 << 8 );
108- aws_run_cpuid (7 , 0 , abcd );
109- if ((abcd [1 ] & bmi2_mask ) != bmi2_mask ) {
110- return false;
92+ if (feature_avx ) {
93+ if (avx512_usable ) {
94+ s_cpu_features [AWS_CPU_FEATURE_AVX512 ] = abcd [1 ] & (1 << 16 ); /* AVX-512 Foundation = EBX[bit 16] */
95+ }
11196 }
112-
113- return true;
11497}
11598
116- static bool s_has_vpclmulqdq (void ) {
117- uint32_t abcd [4 ];
118- /* Check VPCLMULQDQ:
119- * CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 10]==1 */
120- uint32_t vpclmulqdq_mask = (1 << 10 );
121- aws_run_cpuid (7 , 0 , abcd );
122- if ((abcd [2 ] & vpclmulqdq_mask ) != vpclmulqdq_mask ) {
123- return false;
99+ bool aws_cpu_has_feature (enum aws_cpu_feature_name feature_name ) {
100+ /* Look up and cache all hardware features the first time this is called */
101+ if (AWS_UNLIKELY (!s_cpu_features_cached )) {
102+ s_cache_cpu_features ();
103+ s_cpu_features_cached = true;
124104 }
125- return true;
126- }
127105
128- has_feature_fn * s_check_cpu_feature [AWS_CPU_FEATURE_COUNT ] = {
129- [AWS_CPU_FEATURE_CLMUL ] = s_has_clmul ,
130- [AWS_CPU_FEATURE_SSE_4_1 ] = s_has_sse41 ,
131- [AWS_CPU_FEATURE_SSE_4_2 ] = s_has_sse42 ,
132- [AWS_CPU_FEATURE_AVX2 ] = s_has_avx2 ,
133- [AWS_CPU_FEATURE_AVX512 ] = s_has_avx512 ,
134- [AWS_CPU_FEATURE_BMI2 ] = s_has_bmi2 ,
135- [AWS_CPU_FEATURE_VPCLMULQDQ ] = s_has_vpclmulqdq ,
136- };
137-
138- bool aws_cpu_has_feature (enum aws_cpu_feature_name feature_name ) {
139- if (s_check_cpu_feature [feature_name ])
140- return s_check_cpu_feature [feature_name ]();
141- return false;
106+ AWS_ASSERT (feature_name >= 0 && feature_name < AWS_CPU_FEATURE_COUNT );
107+ return s_cpu_features [feature_name ];
142108}
143109
144110#define CPUID_AVAILABLE 0
0 commit comments