Skip to content

[experiment] enable outline-atomics on more aarch64 platforms #144938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
.into(),
arch,
options: TargetOptions {
features: "+neon,+fp-armv8,+apple-a7".into(),
features: "+neon,+fp-armv8,+apple-a7,+outline-atomics".into(),
max_atomic_width: Some(128),
supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD,
..opts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
.into(),
arch,
options: TargetOptions {
features: "+neon,+fp-armv8,+apple-a7".into(),
features: "+neon,+fp-armv8,+apple-a7,+outline-atomics".into(),
max_atomic_width: Some(128),
..opts
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
.into(),
arch,
options: TargetOptions {
features: "+neon,+fp-armv8,+apple-a16".into(),
features: "+neon,+fp-armv8,+apple-a16,+outline-atomics".into(),
max_atomic_width: Some(128),
supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD,
..opts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target {
.into(),
arch,
options: TargetOptions {
features: "+v8a,+neon,+fp-armv8,+apple-a7".into(),
features: "+v8a,+neon,+fp-armv8,+apple-a7,+outline-atomics".into(),
max_atomic_width: Some(128),
dynamic_linking: false,
position_independent_executables: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub(crate) fn target() -> Target {
max_atomic_width: Some(128),
// As documented in https://developer.android.com/ndk/guides/cpu-features.html
// the neon (ASIMD) and FP must exist on all android aarch64 targets.
features: "+v8a,+neon,+fp-armv8".into(),
features: "+v8a,+neon,+fp-armv8,+outline-atomics".into(),
// the AAPCS64 expects use of non-leaf frame pointers per
// https://github.com/ARM-software/abi-aa/blob/4492d1570eb70c8fd146623e0db65b2d241f12e7/aapcs64/aapcs64.rst#the-frame-pointer
// and we tend to encounter interesting bugs in AArch64 unwinding code if we do not
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::spec::{FramePointer, Target, TargetMetadata, base};
pub(crate) fn target() -> Target {
let mut base = base::windows_gnullvm::opts();
base.max_atomic_width = Some(128);
base.features = "+v8a,+neon,+fp-armv8".into();
base.features = "+v8a,+neon,+fp-armv8,+outline-atomics".into();
base.linker = Some("aarch64-w64-mingw32-clang".into());

// Microsoft recommends enabling frame pointers on Arm64 Windows.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::spec::{FramePointer, Target, TargetMetadata, base};
pub(crate) fn target() -> Target {
let mut base = base::windows_msvc::opts();
base.max_atomic_width = Some(128);
base.features = "+v8a,+neon,+fp-armv8".into();
base.features = "+v8a,+neon,+fp-armv8,+outline-atomics".into();

// Microsoft recommends enabling frame pointers on Arm64 Windows.
// From https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#integer-registers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(crate) fn target() -> Target {
data_layout: "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32".into(),
arch: "aarch64".into(),
options: TargetOptions {
features: "+v8a".into(),
features: "+v8a,+outline-atomics".into(),
max_atomic_width: Some(128),
stack_probes: StackProbeType::Inline,
supported_sanitizers: SanitizerSet::ADDRESS
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ static AARCH64_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
("mte", Stable, &[]),
// FEAT_AdvSimd & FEAT_FP
("neon", Stable, &[]),
// Backend option to turn atomic operations into an intrinsic call when `lse` is not known to be
// available, so the intrinsic can do runtime LSE feature detection rather than unconditionally
// using slower non-LSE operations. Unstable since it doesn't need to user-togglable.
("outline-atomics", Unstable(sym::aarch64_unstable_target_feature), &[]),
// FEAT_PAUTH (address authentication)
("paca", Stable, &[]),
// FEAT_PAUTH (generic authentication)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
//! which is supported on the current CPU.
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
//!
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
//! Use the `compiler-rt` intrinsics if you want LSE support.
//!
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
//!
//! Generate functions for each of the following symbols:
Expand All @@ -24,7 +21,18 @@
//! We do something similar, but with macro arguments.
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule

// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
use core::sync::atomic::{AtomicU8, Ordering};

/// non-zero if the host supports LSE atomics.
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);

intrinsics! {
/// Call to enable LSE in outline atomic operations. The caller must verify
/// LSE operations are supported.
pub extern "C" fn __rust_enable_lse() {
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
}
}

/// Translate a byte size to a Rust type.
#[rustfmt::skip]
Expand Down Expand Up @@ -126,6 +134,39 @@ macro_rules! stxp {
};
}

// Check if LSE intrinsic can be used, and jump to label if not.
macro_rules! jmp_if_no_lse {
($label:literal) => {
concat!(
".arch_extension lse; ",
"adrp x16, {have_lse}; ",
"ldrb w16, [x16, :lo12:{have_lse}]; ",
"cbz w16, ",
$label,
";"
)
};
}

// Translate memory ordering to the LSE suffix
#[rustfmt::skip]
macro_rules! lse_mem_sfx {
(Relaxed) => { "" };
(Acquire) => { "a" };
(Release) => { "l" };
(AcqRel) => { "al" };
}

// Generate the aarch64 LSE operation for memory ordering and width
macro_rules! lse {
($op:literal, $order:ident, 16) => {
concat!($op, "p", lse_mem_sfx!($order))
};
($op:literal, $order:ident, $bytes:tt) => {
concat!($op, lse_mem_sfx!($order), size!($bytes))
};
}

/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
macro_rules! compare_and_swap {
($ordering:ident, $bytes:tt, $name:ident) => {
Expand All @@ -137,6 +178,11 @@ macro_rules! compare_and_swap {
) -> int_ty!($bytes) {
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
// CAS s(0), s(1), [x2]
concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"),
"ret",
"8:",
// UXT s(tmp0), s(0)
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
Expand All @@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
"cbnz w17, 0b",
"1:",
"ret",
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -166,6 +213,11 @@ macro_rules! compare_and_swap_i128 {
expected: i128, desired: i128, ptr: *mut i128
) -> i128 {
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
// CASP x0, x1, x2, x3, [x4]
concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"),
"ret",
"8:",
"mov x16, x0",
"mov x17, x1",
"0:",
Expand All @@ -179,6 +231,7 @@ macro_rules! compare_and_swap_i128 {
"cbnz w15, 0b",
"1:",
"ret",
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -195,6 +248,11 @@ macro_rules! swap {
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
) -> int_ty!($bytes) {
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
// SWP s(0), s(0), [x1]
concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
"ret",
"8:",
// mov s(tmp0), s(0)
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
Expand All @@ -204,6 +262,7 @@ macro_rules! swap {
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
"cbnz w17, 0b",
"ret",
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -212,14 +271,19 @@ macro_rules! swap {

/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
macro_rules! fetch_op {
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
intrinsics! {
#[maybe_use_optimized_c_shim]
#[unsafe(naked)]
pub unsafe extern "C" fn $name (
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
) -> int_ty!($bytes) {
core::arch::naked_asm! {
jmp_if_no_lse!("8f"),
// LSEOP s(0), s(0), [x1]
concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"),
"ret",
"8:",
// mov s(tmp0), s(0)
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
"0:",
Expand All @@ -231,6 +295,7 @@ macro_rules! fetch_op {
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
"cbnz w15, 0b",
"ret",
have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS,
}
}
}
Expand All @@ -240,25 +305,25 @@ macro_rules! fetch_op {
// We need a single macro to pass to `foreach_ldadd`.
macro_rules! add {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "add" }
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
};
}

macro_rules! and {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "bic" }
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
};
}

macro_rules! xor {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "eor" }
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
};
}

macro_rules! or {
($ordering:ident, $bytes:tt, $name:ident) => {
fetch_op! { $ordering, $bytes, $name, "orr" }
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
};
}

Expand Down
4 changes: 2 additions & 2 deletions library/compiler-builtins/compiler-builtins/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ pub mod arm;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
pub mod aarch64;

#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))]
pub mod aarch64_linux;
#[cfg(all(target_arch = "aarch64", target_feature = "outline-atomics"))]
pub mod aarch64_outline_atomics;

#[cfg(all(
kernel_user_helpers,
Expand Down
27 changes: 27 additions & 0 deletions library/std/src/sys/configure_builtins.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/// Hook into .init_array to enable LSE atomic operations at startup, if
/// supported.
#[cfg(all(
target_arch = "aarch64",
target_os = "linux",
any(target_env = "gnu", target_env = "musl"),
not(feature = "compiler-builtins-c")
))]
#[used]
#[unsafe(link_section = ".init_array.90")]
static RUST_LSE_INIT: extern "C" fn() = {
extern "C" fn init_lse() {
use crate::arch;

// This is provided by compiler-builtins::aarch64_linux.
unsafe extern "C" {
fn __rust_enable_lse();
}

if arch::is_aarch64_feature_detected!("lse") {
unsafe {
__rust_enable_lse();
}
}
}
init_lse
};
5 changes: 5 additions & 0 deletions library/std/src/sys/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#![allow(unsafe_op_in_unsafe_fn)]

/// The configure builtins provides runtime support compiler-builtin features
/// which require dynamic intialization to work as expected, e.g. aarch64
/// outline-atomics.
mod configure_builtins;

/// The PAL (platform abstraction layer) contains platform-specific abstractions
/// for implementing the features in the other submodules, e.g. UNIX file
/// descriptors.
Expand Down
4 changes: 4 additions & 0 deletions tests/assembly-llvm/asm/aarch64-outline-atomics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
use std::sync::atomic::AtomicI32;
use std::sync::atomic::Ordering::*;

// Verify config on outline-atomics works (it is always enabled on aarch64-linux).
#[cfg(not(target_feature = "outline-atomics"))]
compile_error!("outline-atomics is not enabled");

pub fn compare_exchange(a: &AtomicI32) {
// On AArch64 LLVM should outline atomic operations.
// CHECK: __aarch64_cas4_relax
Expand Down
1 change: 1 addition & 0 deletions tests/ui/check-cfg/target_feature.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`nnp-assist`
`nontrapping-fptoint`
`nvic`
`outline-atomics`
`paca`
`pacg`
`pan`
Expand Down
Loading