Skip to content

Commit da3f924

Browse files
Auto merge of #144938 - tgross35:more-outline-atomics, r=<try>
[experiment] enable outline-atomics on more aarch64 platforms try-job: arm-android try-job: dist-android try-job: dist-x86_64-freebsd try-job: dist-aarch64-windows-gnullvm try-job: dist-aarch64-apple try-job: aarch64-msvc-1 try-job: aarch64-msvc-2 try-job: dist-aarch64-msvc
2 parents 0060d5a + a25e101 commit da3f924

File tree

15 files changed

+125
-19
lines changed

15 files changed

+125
-19
lines changed

compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
1616
.into(),
1717
arch,
1818
options: TargetOptions {
19-
features: "+neon,+fp-armv8,+apple-a7".into(),
19+
features: "+neon,+fp-armv8,+apple-a7,+outline-atomics".into(),
2020
max_atomic_width: Some(128),
2121
supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD,
2222
..opts

compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
1616
.into(),
1717
arch,
1818
options: TargetOptions {
19-
features: "+neon,+fp-armv8,+apple-a7".into(),
19+
features: "+neon,+fp-armv8,+apple-a7,+outline-atomics".into(),
2020
max_atomic_width: Some(128),
2121
..opts
2222
},

compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
1616
.into(),
1717
arch,
1818
options: TargetOptions {
19-
features: "+neon,+fp-armv8,+apple-a16".into(),
19+
features: "+neon,+fp-armv8,+apple-a16,+outline-atomics".into(),
2020
max_atomic_width: Some(128),
2121
supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD,
2222
..opts

compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
1616
.into(),
1717
arch,
1818
options: TargetOptions {
19-
features: "+v8a,+neon,+fp-armv8,+apple-a7".into(),
19+
features: "+v8a,+neon,+fp-armv8,+apple-a7,+outline-atomics".into(),
2020
max_atomic_width: Some(128),
2121
dynamic_linking: false,
2222
position_independent_executables: true,

compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub(crate) fn target() -> Target {
2121
max_atomic_width: Some(128),
2222
// As documented in https://developer.android.com/ndk/guides/cpu-features.html
2323
// the neon (ASIMD) and FP must exist on all android aarch64 targets.
24-
features: "+v8a,+neon,+fp-armv8".into(),
24+
features: "+v8a,+neon,+fp-armv8,+outline-atomics".into(),
2525
// the AAPCS64 expects use of non-leaf frame pointers per
2626
// https://github.com/ARM-software/abi-aa/blob/4492d1570eb70c8fd146623e0db65b2d241f12e7/aapcs64/aapcs64.rst#the-frame-pointer
2727
// and we tend to encounter interesting bugs in AArch64 unwinding code if we do not

compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::spec::{FramePointer, Target, TargetMetadata, base};
33
pub(crate) fn target() -> Target {
44
let mut base = base::windows_gnullvm::opts();
55
base.max_atomic_width = Some(128);
6-
base.features = "+v8a,+neon,+fp-armv8".into();
6+
base.features = "+v8a,+neon,+fp-armv8,+outline-atomics".into();
77
base.linker = Some("aarch64-w64-mingw32-clang".into());
88

99
// Microsoft recommends enabling frame pointers on Arm64 Windows.

compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::spec::{FramePointer, Target, TargetMetadata, base};
33
pub(crate) fn target() -> Target {
44
let mut base = base::windows_msvc::opts();
55
base.max_atomic_width = Some(128);
6-
base.features = "+v8a,+neon,+fp-armv8".into();
6+
base.features = "+v8a,+neon,+fp-armv8,+outline-atomics".into();
77

88
// Microsoft recommends enabling frame pointers on Arm64 Windows.
99
// From https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#integer-registers

compiler/rustc_target/src/spec/targets/aarch64_unknown_freebsd.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub(crate) fn target() -> Target {
1313
data_layout: "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32".into(),
1414
arch: "aarch64".into(),
1515
options: TargetOptions {
16-
features: "+v8a".into(),
16+
features: "+v8a,+outline-atomics".into(),
1717
max_atomic_width: Some(128),
1818
stack_probes: StackProbeType::Inline,
1919
supported_sanitizers: SanitizerSet::ADDRESS

compiler/rustc_target/src/target_features.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ static AARCH64_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
248248
("mte", Stable, &[]),
249249
// FEAT_AdvSimd & FEAT_FP
250250
("neon", Stable, &[]),
251+
// Backend option to turn atomic operations into an intrinsic call when `lse` is not known to be
252+
// available, so the intrinsic can do runtime LSE feature detection rather than unconditionally
253+
// using slower non-LSE operations. Unstable since it doesn't need to user-togglable.
254+
("outline-atomics", Unstable(sym::aarch64_unstable_target_feature), &[]),
251255
// FEAT_PAUTH (address authentication)
252256
("paca", Stable, &[]),
253257
// FEAT_PAUTH (generic authentication)

library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs renamed to library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs

Lines changed: 74 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//! which is supported on the current CPU.
77
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
88
//!
9-
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10-
//! Use the `compiler-rt` intrinsics if you want LSE support.
11-
//!
129
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1310
//!
1411
//! Generate functions for each of the following symbols:
@@ -24,7 +21,18 @@
2421
//! We do something similar, but with macro arguments.
2522
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
2623

27-
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24+
use core::sync::atomic::{AtomicU8, Ordering};
25+
26+
/// non-zero if the host supports LSE atomics.
27+
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);
28+
29+
intrinsics! {
30+
/// Call to enable LSE in outline atomic operations. The caller must verify
31+
/// LSE operations are supported.
32+
pub extern "C" fn __rust_enable_lse() {
33+
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
34+
}
35+
}
2836

2937
/// Translate a byte size to a Rust type.
3038
#[rustfmt::skip]
@@ -126,6 +134,39 @@ macro_rules! stxp {
126134
};
127135
}
128136

137+
// Check if LSE intrinsic can be used, and jump to label if not.
138+
macro_rules! jmp_if_no_lse {
139+
($label:literal) => {
140+
concat!(
141+
".arch_extension lse; ",
142+
"adrp x16, {have_lse}; ",
143+
"ldrb w16, [x16, :lo12:{have_lse}]; ",
144+
"cbz w16, ",
145+
$label,
146+
";"
147+
)
148+
};
149+
}
150+
151+
// Translate memory ordering to the LSE suffix
152+
#[rustfmt::skip]
153+
macro_rules! lse_mem_sfx {
154+
(Relaxed) => { "" };
155+
(Acquire) => { "a" };
156+
(Release) => { "l" };
157+
(AcqRel) => { "al" };
158+
}
159+
160+
// Generate the aarch64 LSE operation for memory ordering and width
161+
macro_rules! lse {
162+
($op:literal, $order:ident, 16) => {
163+
concat!($op, "p", lse_mem_sfx!($order))
164+
};
165+
($op:literal, $order:ident, $bytes:tt) => {
166+
concat!($op, lse_mem_sfx!($order), size!($bytes))
167+
};
168+
}
169+
129170
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130171
macro_rules! compare_and_swap {
131172
($ordering:ident, $bytes:tt, $name:ident) => {
@@ -137,6 +178,11 @@ macro_rules! compare_and_swap {
137178
) -> int_ty!($bytes) {
138179
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139180
core::arch::naked_asm! {
181+
jmp_if_no_lse!("8f"),
182+
// CAS s(0), s(1), [x2]
183+
concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"),
184+
"ret",
185+
"8:",
140186
// UXT s(tmp0), s(0)
141187
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
142188
"0:",
@@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
150196
"cbnz w17, 0b",
151197
"1:",
152198
"ret",
199+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
153200
}
154201
}
155202
}
@@ -166,6 +213,11 @@ macro_rules! compare_and_swap_i128 {
166213
expected: i128, desired: i128, ptr: *mut i128
167214
) -> i128 {
168215
core::arch::naked_asm! {
216+
jmp_if_no_lse!("8f"),
217+
// CASP x0, x1, x2, x3, [x4]
218+
concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"),
219+
"ret",
220+
"8:",
169221
"mov x16, x0",
170222
"mov x17, x1",
171223
"0:",
@@ -179,6 +231,7 @@ macro_rules! compare_and_swap_i128 {
179231
"cbnz w15, 0b",
180232
"1:",
181233
"ret",
234+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
182235
}
183236
}
184237
}
@@ -195,6 +248,11 @@ macro_rules! swap {
195248
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
196249
) -> int_ty!($bytes) {
197250
core::arch::naked_asm! {
251+
jmp_if_no_lse!("8f"),
252+
// SWP s(0), s(0), [x1]
253+
concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
254+
"ret",
255+
"8:",
198256
// mov s(tmp0), s(0)
199257
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
200258
"0:",
@@ -204,6 +262,7 @@ macro_rules! swap {
204262
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
205263
"cbnz w17, 0b",
206264
"ret",
265+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
207266
}
208267
}
209268
}
@@ -212,14 +271,19 @@ macro_rules! swap {
212271

213272
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214273
macro_rules! fetch_op {
215-
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
274+
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
216275
intrinsics! {
217276
#[maybe_use_optimized_c_shim]
218277
#[unsafe(naked)]
219278
pub unsafe extern "C" fn $name (
220279
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
221280
) -> int_ty!($bytes) {
222281
core::arch::naked_asm! {
282+
jmp_if_no_lse!("8f"),
283+
// LSEOP s(0), s(0), [x1]
284+
concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
285+
"ret",
286+
"8:",
223287
// mov s(tmp0), s(0)
224288
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
225289
"0:",
@@ -231,6 +295,7 @@ macro_rules! fetch_op {
231295
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
232296
"cbnz w15, 0b",
233297
"ret",
298+
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
234299
}
235300
}
236301
}
@@ -240,25 +305,25 @@ macro_rules! fetch_op {
240305
// We need a single macro to pass to `foreach_ldadd`.
241306
macro_rules! add {
242307
($ordering:ident, $bytes:tt, $name:ident) => {
243-
fetch_op! { $ordering, $bytes, $name, "add" }
308+
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
244309
};
245310
}
246311

247312
macro_rules! and {
248313
($ordering:ident, $bytes:tt, $name:ident) => {
249-
fetch_op! { $ordering, $bytes, $name, "bic" }
314+
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
250315
};
251316
}
252317

253318
macro_rules! xor {
254319
($ordering:ident, $bytes:tt, $name:ident) => {
255-
fetch_op! { $ordering, $bytes, $name, "eor" }
320+
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
256321
};
257322
}
258323

259324
macro_rules! or {
260325
($ordering:ident, $bytes:tt, $name:ident) => {
261-
fetch_op! { $ordering, $bytes, $name, "orr" }
326+
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
262327
};
263328
}
264329

0 commit comments

Comments
 (0)