Skip to content

Commit a8f3686

Browse files
authored
Rollup merge of #144705 - pmur:murp/aarch64-lse, r=Amanieu
compiler-builtins: plumb LSE support for aarch64 on linux/gnu when optimized-compiler-builtins not enabled Add dynamic support for aarch64 LSE atomic ops on linux/gnu targets when optimized-compiler-builtins is not enabled. Enabling LSE is the primary motivator for #143689, though extending the rust version doesn't seem too farfetched. Are there more details which I have overlooked which make this impractical? I've tested this on an aarch64 host with LSE. r? ```````@tgross35```````
2 parents 2fd855f + 6936bb9 commit a8f3686

File tree

3 files changed

+93
-10
lines changed

3 files changed

+93
-10
lines changed

library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
//! which is supported on the current CPU.
77
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
88
//!
9-
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10-
//! Use the `compiler-rt` intrinsics if you want LSE support.
11-
//!
129
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1310
//!
1411
//! Generate functions for each of the following symbols:
@@ -24,7 +21,18 @@
2421
//! We do something similar, but with macro arguments.
2522
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
2623

27-
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24+
use core::sync::atomic::{AtomicU8, Ordering};
25+
26+
/// non-zero if the host supports LSE atomics.
27+
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);
28+
29+
intrinsics! {
30+
/// Call to enable LSE in outline atomic operations. The caller must verify
31+
/// LSE operations are supported.
32+
pub extern "C" fn __rust_enable_lse() {
33+
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
34+
}
35+
}
2836

2937
/// Translate a byte size to a Rust type.
3038
#[rustfmt::skip]
@@ -45,6 +53,7 @@ macro_rules! reg {
4553
(2, $num:literal) => { concat!("w", $num) };
4654
(4, $num:literal) => { concat!("w", $num) };
4755
(8, $num:literal) => { concat!("x", $num) };
56+
(16, $num:literal) => { concat!("x", $num) };
4857
}
4958

5059
/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
@@ -126,6 +135,41 @@ macro_rules! stxp {
126135
};
127136
}
128137

138+
// If supported, perform the requested LSE op and return, or fallthrough.
139+
macro_rules! try_lse_op {
140+
($op: literal, $ordering:ident, $bytes:tt, $($reg:literal,)* [ $mem:ident ] ) => {
141+
concat!(
142+
".arch_extension lse; ",
143+
"adrp x16, {have_lse}; ",
144+
"ldrb w16, [x16, :lo12:{have_lse}]; ",
145+
"cbz w16, 8f; ",
146+
// LSE_OP s(reg),* [$mem]
147+
concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]; ",),
148+
"ret; ",
149+
"8:"
150+
)
151+
};
152+
}
153+
154+
// Translate memory ordering to the LSE suffix
155+
#[rustfmt::skip]
156+
macro_rules! lse_mem_sfx {
157+
(Relaxed) => { "" };
158+
(Acquire) => { "a" };
159+
(Release) => { "l" };
160+
(AcqRel) => { "al" };
161+
}
162+
163+
// Generate the aarch64 LSE operation for memory ordering and width
164+
macro_rules! lse {
165+
($op:literal, $order:ident, 16) => {
166+
concat!($op, "p", lse_mem_sfx!($order))
167+
};
168+
($op:literal, $order:ident, $bytes:tt) => {
169+
concat!($op, lse_mem_sfx!($order), size!($bytes))
170+
};
171+
}
172+
129173
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130174
macro_rules! compare_and_swap {
131175
($ordering:ident, $bytes:tt, $name:ident) => {
@@ -137,7 +181,9 @@ macro_rules! compare_and_swap {
137181
) -> int_ty!($bytes) {
138182
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139183
core::arch::naked_asm! {
140-
// UXT s(tmp0), s(0)
184+
// CAS s(0), s(1), [x2]; if LSE supported.
185+
try_lse_op!("cas", $ordering, $bytes, 0, 1, [x2]),
186+
// UXT s(tmp0), s(0)
141187
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
142188
"0:",
143189
// LDXR s(0), [x2]
@@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
150196
"cbnz w17, 0b",
151197
"1:",
152198
"ret",
199+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
153200
}
154201
}
155202
}
@@ -166,6 +213,8 @@ macro_rules! compare_and_swap_i128 {
166213
expected: i128, desired: i128, ptr: *mut i128
167214
) -> i128 {
168215
core::arch::naked_asm! {
216+
// CASP x0, x1, x2, x3, [x4]; if LSE supported.
217+
try_lse_op!("cas", $ordering, 16, 0, 1, 2, 3, [x4]),
169218
"mov x16, x0",
170219
"mov x17, x1",
171220
"0:",
@@ -179,6 +228,7 @@ macro_rules! compare_and_swap_i128 {
179228
"cbnz w15, 0b",
180229
"1:",
181230
"ret",
231+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
182232
}
183233
}
184234
}
@@ -195,6 +245,8 @@ macro_rules! swap {
195245
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
196246
) -> int_ty!($bytes) {
197247
core::arch::naked_asm! {
248+
// SWP s(0), s(0), [x1]; if LSE supported.
249+
try_lse_op!("swp", $ordering, $bytes, 0, 0, [x1]),
198250
// mov s(tmp0), s(0)
199251
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
200252
"0:",
@@ -204,6 +256,7 @@ macro_rules! swap {
204256
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
205257
"cbnz w17, 0b",
206258
"ret",
259+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
207260
}
208261
}
209262
}
@@ -212,14 +265,16 @@ macro_rules! swap {
212265

213266
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214267
macro_rules! fetch_op {
215-
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
268+
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
216269
intrinsics! {
217270
#[maybe_use_optimized_c_shim]
218271
#[unsafe(naked)]
219272
pub unsafe extern "C" fn $name (
220273
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
221274
) -> int_ty!($bytes) {
222275
core::arch::naked_asm! {
276+
// LSEOP s(0), s(0), [x1]; if LSE supported.
277+
try_lse_op!($lse_op, $ordering, $bytes, 0, 0, [x1]),
223278
// mov s(tmp0), s(0)
224279
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
225280
"0:",
@@ -231,6 +286,7 @@ macro_rules! fetch_op {
231286
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
232287
"cbnz w15, 0b",
233288
"ret",
289+
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
234290
}
235291
}
236292
}
@@ -240,25 +296,25 @@ macro_rules! fetch_op {
240296
// We need a single macro to pass to `foreach_ldadd`.
241297
macro_rules! add {
242298
($ordering:ident, $bytes:tt, $name:ident) => {
243-
fetch_op! { $ordering, $bytes, $name, "add" }
299+
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
244300
};
245301
}
246302

247303
macro_rules! and {
248304
($ordering:ident, $bytes:tt, $name:ident) => {
249-
fetch_op! { $ordering, $bytes, $name, "bic" }
305+
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
250306
};
251307
}
252308

253309
macro_rules! xor {
254310
($ordering:ident, $bytes:tt, $name:ident) => {
255-
fetch_op! { $ordering, $bytes, $name, "eor" }
311+
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
256312
};
257313
}
258314

259315
macro_rules! or {
260316
($ordering:ident, $bytes:tt, $name:ident) => {
261-
fetch_op! { $ordering, $bytes, $name, "orr" }
317+
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
262318
};
263319
}
264320

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/// Hook into .init_array to enable LSE atomic operations at startup, if
2+
/// supported.
3+
#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "compiler-builtins-c")))]
4+
#[used]
5+
#[unsafe(link_section = ".init_array.90")]
6+
static RUST_LSE_INIT: extern "C" fn() = {
7+
extern "C" fn init_lse() {
8+
use crate::arch;
9+
10+
// This is provided by compiler-builtins::aarch64_linux.
11+
unsafe extern "C" {
12+
fn __rust_enable_lse();
13+
}
14+
15+
if arch::is_aarch64_feature_detected!("lse") {
16+
unsafe {
17+
__rust_enable_lse();
18+
}
19+
}
20+
}
21+
init_lse
22+
};

library/std/src/sys/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
#![allow(unsafe_op_in_unsafe_fn)]
22

3+
/// The configure builtins provides runtime support compiler-builtin features
4+
/// which require dynamic intialization to work as expected, e.g. aarch64
5+
/// outline-atomics.
6+
mod configure_builtins;
7+
38
/// The PAL (platform abstraction layer) contains platform-specific abstractions
49
/// for implementing the features in the other submodules, e.g. UNIX file
510
/// descriptors.

0 commit comments

Comments
 (0)