Skip to content

Commit f6622ac

Browse files
authored
Detect when AVX is disabled via OSXSAVE (#1182)
**Issue #:** internal V1647663988 [SEV-SNP enabled EC2 instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/sev-snp.html) crash when using AWS CLI to upload with checksum algorithm CRC64NVME. [AWS CLI v2](https://github.com/aws/aws-cli/tree/v2) uses aws-c-common under the hood to do CRC64NVME. **Investigation:** aws-c-common has checks for AVX support, using [CPUID](https://en.wikipedia.org/wiki/CPUID). But apparently, just because CPUID reports that it can do AVX, doesn't necessarily mean it's enabled by the OS. On SEV-SNP enabled instances, AVX is not allowed. GCC had the same bug until 7.4: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100 Stackoverflow question: https://stackoverflow.com/questions/72522885/are-the-xgetbv-and-cpuid-checks-sufficient-to-guarantee-avx2-support **Description of changes:** * Do further checks before deciding that AVX can be used. * Move all Intel feature detection into 1 function. * This is simpler, it's how we're doing feature detection on some other architectures. * Add checks to find the max value that CPUID accepts. We weren't doing this before.
1 parent 0c79778 commit f6622ac

File tree

3 files changed

+84
-102
lines changed

3 files changed

+84
-102
lines changed

source/arch/intel/asm/cpuid.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,14 @@ void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd) {
2727
abcd[2] = ecx;
2828
abcd[3] = edx;
2929
}
30+
31+
uint64_t aws_run_xgetbv(uint32_t xcr) {
32+
/* NOTE: we could have used the _xgetbv() intrinsic in <immintrin.h>, but it's missing from GCC < 9.0:
33+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 */
34+
35+
/* xgetbv writes high and low of 64bit value to EAX:EDX */
36+
uint32_t eax;
37+
uint32_t edx;
38+
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
39+
return (((uint64_t)eax) << 32) | edx;
40+
}

source/arch/intel/cpuid.c

Lines changed: 68 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -13,57 +13,70 @@
1313
#include <stdlib.h>
1414

1515
extern void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd);
16+
extern uint64_t aws_run_xgetbv(uint32_t xcr);
1617

17-
typedef bool(has_feature_fn)(void);
18+
static bool s_cpu_features[AWS_CPU_FEATURE_COUNT];
19+
static bool s_cpu_features_cached;
1820

19-
static bool s_has_clmul(void) {
21+
static void s_cache_cpu_features(void) {
22+
/***************************************************************************
23+
* First, find the max EAX value we can pass to CPUID without undefined behavior
24+
* https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=0:_Highest_Function_Parameter_and_Manufacturer_ID
25+
**************************************************************************/
2026
uint32_t abcd[4];
21-
uint32_t clmul_mask = 0x00000002;
22-
aws_run_cpuid(1, 0, abcd);
23-
24-
if ((abcd[2] & clmul_mask) != clmul_mask)
25-
return false;
26-
27-
return true;
28-
}
29-
30-
static bool s_has_sse41(void) {
31-
uint32_t abcd[4];
32-
uint32_t sse41_mask = 0x00080000;
33-
aws_run_cpuid(1, 0, abcd);
34-
35-
if ((abcd[2] & sse41_mask) != sse41_mask)
36-
return false;
37-
38-
return true;
39-
}
40-
41-
static bool s_has_sse42(void) {
42-
uint32_t abcd[4];
43-
uint32_t sse42_mask = 0x00100000;
44-
aws_run_cpuid(1, 0, abcd);
45-
46-
if ((abcd[2] & sse42_mask) != sse42_mask)
47-
return false;
48-
49-
return true;
50-
}
27+
aws_run_cpuid(0x0, 0x0, abcd);
28+
const uint32_t max_cpuid_eax_value = abcd[0]; /* max-value = EAX */
29+
30+
/**************************************************************************
31+
* CPUID(EAX=1H, ECX=0H): Processor Info and Feature Bits
32+
* https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=1:_Processor_Info_and_Feature_Bits
33+
**************************************************************************/
34+
if (0x1 > max_cpuid_eax_value) {
35+
return;
36+
}
37+
aws_run_cpuid(0x1, 0x0, abcd);
38+
s_cpu_features[AWS_CPU_FEATURE_CLMUL] = abcd[2] & (1 << 1); /* pclmulqdq = ECX[bit 1] */
39+
s_cpu_features[AWS_CPU_FEATURE_SSE_4_1] = abcd[2] & (1 << 19); /* sse4.1 = ECX[bit 19] */
40+
s_cpu_features[AWS_CPU_FEATURE_SSE_4_2] = abcd[2] & (1 << 20); /* sse4.2 = ECX[bit 20] */
41+
42+
/* NOTE: Even if the AVX flag is set, it's not necessarily usable.
43+
* We need to check that OSXSAVE is enabled, and check further capabilities via XGETBV.
44+
* GCC had the same bug until 7.4: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100 */
45+
bool avx_usable = false;
46+
bool avx512_usable = false;
47+
bool feature_osxsave = abcd[2] & (1 << 27); /* osxsave = ECX[bit 27] */
48+
if (feature_osxsave) {
49+
/* Check XCR0 (Extended Control Register 0) via XGETBV
50+
* https://en.wikipedia.org/w/index.php?title=Control_register&oldid=1268423710#XCR0_and_XSS */
51+
uint64_t xcr0 = aws_run_xgetbv(0);
52+
const uint64_t avx_mask = (1 << 1) /* SSE = XCR0[bit 1] */
53+
| (1 << 2) /* AVX = XCR0[bit 2] */;
54+
avx_usable = (xcr0 & avx_mask) == avx_mask;
55+
56+
const uint64_t avx512_mask = (1 << 5) /* OPMASK = XCR0[bit 5] */
57+
| (1 << 6) /* ZMM_Hi256 = XCR0[bit 6] */
58+
| (1 << 7) /* Hi16_ZMM = XCR0[bit 7] */
59+
| avx_mask;
60+
avx512_usable = (xcr0 & avx512_mask) == avx512_mask;
61+
}
5162

52-
static bool s_has_avx2(void) {
53-
uint32_t abcd[4];
63+
bool feature_avx = false;
64+
if (avx_usable) {
65+
feature_avx = abcd[2] & (1 << 28); /* avx = ECX[bit 28] */
66+
}
5467

55-
/* Check AVX2:
56-
* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 */
57-
uint32_t avx2_mask = (1 << 5);
58-
aws_run_cpuid(7, 0, abcd);
59-
if ((abcd[1] & avx2_mask) != avx2_mask) {
60-
return false;
68+
/***************************************************************************
69+
* CPUID(EAX=7H, ECX=0H): Extended Features
70+
* https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=7,_ECX=0:_Extended_Features
71+
**************************************************************************/
72+
if (0x7 > max_cpuid_eax_value) {
73+
return;
6174
}
75+
aws_run_cpuid(0x7, 0x0, abcd);
76+
s_cpu_features[AWS_CPU_FEATURE_BMI2] = abcd[1] & (1 << 8); /* bmi2 = EBX[bit 8] */
77+
s_cpu_features[AWS_CPU_FEATURE_VPCLMULQDQ] = abcd[2] & (1 << 10); /* vpclmulqdq = ECX[bit 10] */
6278

63-
/* Also check AVX:
64-
* CPUID.(EAX=01H, ECX=0H):ECX.AVX[bit 28]==1
65-
*
66-
* NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
79+
/* NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
6780
* But we've received crash reports where the AVX2 feature check passed
6881
* and then an AVX instruction caused an "invalid instruction" crash.
6982
*
@@ -76,69 +89,22 @@ static bool s_has_avx2(void) {
7689
*
7790
* We don't know for sure what was up with those machines, but this extra
7891
* check should stop them from running our AVX/AVX2 code paths. */
79-
uint32_t avx1_mask = (1 << 28);
80-
aws_run_cpuid(1, 0, abcd);
81-
if ((abcd[2] & avx1_mask) != avx1_mask) {
82-
return false;
83-
}
84-
85-
return true;
86-
}
87-
88-
static bool s_has_avx512(void) {
89-
uint32_t abcd[4];
90-
91-
/* Check AVX512F:
92-
* CPUID.(EAX=07H, ECX=0H):EBX.AVX512[bit 16]==1 */
93-
uint32_t avx512_mask = (1 << 16);
94-
aws_run_cpuid(7, 0, abcd);
95-
if ((abcd[1] & avx512_mask) != avx512_mask) {
96-
return false;
97-
}
98-
99-
return true;
100-
}
101-
102-
static bool s_has_bmi2(void) {
103-
uint32_t abcd[4];
104-
105-
/* Check BMI2:
106-
* CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
107-
uint32_t bmi2_mask = (1 << 8);
108-
aws_run_cpuid(7, 0, abcd);
109-
if ((abcd[1] & bmi2_mask) != bmi2_mask) {
110-
return false;
92+
if (feature_avx) {
93+
if (avx512_usable) {
94+
s_cpu_features[AWS_CPU_FEATURE_AVX512] = abcd[1] & (1 << 16); /* AVX-512 Foundation = EBX[bit 16] */
95+
}
11196
}
112-
113-
return true;
11497
}
11598

116-
static bool s_has_vpclmulqdq(void) {
117-
uint32_t abcd[4];
118-
/* Check VPCLMULQDQ:
119-
* CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 10]==1 */
120-
uint32_t vpclmulqdq_mask = (1 << 10);
121-
aws_run_cpuid(7, 0, abcd);
122-
if ((abcd[2] & vpclmulqdq_mask) != vpclmulqdq_mask) {
123-
return false;
99+
bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
100+
/* Look up and cache all hardware features the first time this is called */
101+
if (AWS_UNLIKELY(!s_cpu_features_cached)) {
102+
s_cache_cpu_features();
103+
s_cpu_features_cached = true;
124104
}
125-
return true;
126-
}
127105

128-
has_feature_fn *s_check_cpu_feature[AWS_CPU_FEATURE_COUNT] = {
129-
[AWS_CPU_FEATURE_CLMUL] = s_has_clmul,
130-
[AWS_CPU_FEATURE_SSE_4_1] = s_has_sse41,
131-
[AWS_CPU_FEATURE_SSE_4_2] = s_has_sse42,
132-
[AWS_CPU_FEATURE_AVX2] = s_has_avx2,
133-
[AWS_CPU_FEATURE_AVX512] = s_has_avx512,
134-
[AWS_CPU_FEATURE_BMI2] = s_has_bmi2,
135-
[AWS_CPU_FEATURE_VPCLMULQDQ] = s_has_vpclmulqdq,
136-
};
137-
138-
bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
139-
if (s_check_cpu_feature[feature_name])
140-
return s_check_cpu_feature[feature_name]();
141-
return false;
106+
AWS_ASSERT(feature_name >= 0 && feature_name < AWS_CPU_FEATURE_COUNT);
107+
return s_cpu_features[feature_name];
142108
}
143109

144110
#define CPUID_AVAILABLE 0

source/arch/intel/msvc/cpuid.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,13 @@
55

66
#include <aws/common/cpuid.h>
77

8+
#include <immintrin.h>
89
#include <intrin.h>
910

1011
void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd) {
1112
__cpuidex((int32_t *)abcd, eax, ecx);
1213
}
14+
15+
uint64_t aws_run_xgetbv(uint32_t xcr) {
16+
return _xgetbv(xcr);
17+
}

0 commit comments

Comments
 (0)