diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 6f8fba2a30dc3..0279cad574129 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -861,7 +861,7 @@ pub(crate) fn codegen( "LLVM_module_codegen_make_bitcode", &*module.name, ); - ThinBuffer::new(llmod, config.emit_thin_lto, false) + ThinBuffer::new(llmod, cgcx.lto != Lto::Fat && config.emit_thin_lto, false) }; let data = thin.data(); let _timer = cgcx @@ -949,7 +949,7 @@ pub(crate) fn codegen( // binaries. So we must clone the module to produce the asm output // if we are also producing object code. let llmod = if let EmitObj::ObjectCode(_) = config.emit_obj { - llvm::LLVMCloneModule(llmod) + unsafe { llvm::LLVMCloneModule(llmod) } } else { llmod }; diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 0ade9edb0d2ea..67e7951124879 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -3,7 +3,9 @@ use std::ops::Deref; use std::{iter, ptr}; pub(crate) mod autodiff; +pub(crate) mod gpu_device; pub(crate) mod gpu_offload; +pub(crate) mod gpu_wrapper; use libc::{c_char, c_uint, size_t}; use rustc_abi as abi; diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_device.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_device.rs new file mode 100644 index 0000000000000..cb957872fec05 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_device.rs @@ -0,0 +1,113 @@ +use std::ffi::{CString, c_uint}; + +use llvm::Linkage::*; +use rustc_codegen_ssa::back::write::CodegenContext; + +use crate::llvm::{self, Linkage}; +use crate::{LlvmCodegenBackend, SimpleCx}; + +fn add_unnamed_global_in_addrspace<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, + addrspace: u32, +) -> &'ll llvm::Value { + let llglobal = add_global_in_addrspace(cx, name, initializer, l, addrspace); + llvm::LLVMSetUnnamedAddress(llglobal, llvm::UnnamedAddr::Global); + llglobal +} + +pub(crate) fn add_global_in_addrspace<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, + addrspace: u32, +) -> &'ll llvm::Value { + let c_name = CString::new(name).unwrap(); + let llglobal: &'ll llvm::Value = llvm::add_global_in_addrspace( + cx.llmod, + cx.val_ty(initializer), + &c_name, + addrspace as c_uint, + ); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, l); + llvm::set_initializer(llglobal, initializer); + llglobal +} + +#[allow(unused)] +pub(crate) fn gen_asdf<'ll>(cgcx: &CodegenContext, _old_cx: &SimpleCx<'ll>) { + let llcx = unsafe { llvm::LLVMRustContextCreate(false) }; + let module_name = CString::new("offload.wrapper.module").unwrap(); + let llmod = unsafe { llvm::LLVMModuleCreateWithNameInContext(module_name.as_ptr(), llcx) }; + let cx = SimpleCx::new(llmod, llcx, cgcx.pointer_size); + let initializer = cx.get_const_i32(0); + add_unnamed_global_in_addrspace(&cx, "__omp_rtl_debug_kind", initializer, WeakODRLinkage, 1); + add_unnamed_global_in_addrspace( + &cx, + "__omp_rtl_assume_teams_oversubscription", + initializer, + WeakODRLinkage, + 1, + ); + add_unnamed_global_in_addrspace( + &cx, + "__omp_rtl_assume_threads_oversubscription", + initializer, + WeakODRLinkage, + 1, + ); + add_unnamed_global_in_addrspace( + &cx, + "__omp_rtl_assume_no_thread_state", + initializer, + WeakODRLinkage, + 1, + ); + add_unnamed_global_in_addrspace( + &cx, + "__oclc_ABI_version", + cx.get_const_i32(500), + WeakODRLinkage, + 4, + ); + unsafe { + llvm::LLVMPrintModuleToFile( + llmod, + CString::new("rustmagic-openmp-amdgcn-amd-amdhsa-gfx90a.ll").unwrap().as_ptr(), + std::ptr::null_mut(), + ); + + // Clean up + llvm::LLVMDisposeModule(llmod); + llvm::LLVMContextDispose(llcx); + } + // TODO: addressspace 1 or 4 +} +// source_filename = "mem.cpp" +// GPU: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CPU: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +// target triple = "amdgcn-amd-amdhsa" +// +// @__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_teams_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_threads_oversubscription = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 +// @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 +// +// !llvm.module.flags = !{!0, !1, !2, !3, !4} +// !opencl.ocl.version = !{!5} +// !llvm.ident = !{!6, !7} +// +// !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} +// !1 = !{i32 1, !"wchar_size", i32 4} +// !2 = !{i32 7, !"openmp", i32 51} +// !3 = !{i32 7, !"openmp-device", i32 51} +// !4 = !{i32 8, !"PIC Level", i32 2} +// !5 = !{i32 2, i32 0} +// !6 = !{!"clang version 20.1.5-rust-1.89.0-nightly (https://github.com/rust-lang/llvm-project.git c1118fdbb3024157df7f4cfe765f2b0b4339e8a2)"} +// !7 = !{!"AMD clang version 19.0.0git (https://github.com/RadeonOpenCompute/llvm-project roc-6.4.0 25133 c7fe45cf4b819c5991fe208aaa96edf142730f1d)"} diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs index 1280ab1442a09..b87806b8807b3 100644 --- a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs @@ -12,7 +12,7 @@ use crate::llvm::{self, Linkage, Type, Value}; use crate::{LlvmCodegenBackend, SimpleCx, attributes}; pub(crate) fn handle_gpu_code<'ll>( - _cgcx: &CodegenContext, + cgcx: &CodegenContext, cx: &'ll SimpleCx<'_>, ) { // The offload memory transfer type for each kernel @@ -26,8 +26,8 @@ pub(crate) fn handle_gpu_code<'ll>( kernels.push(kernel); } } - gen_call_handling(&cx, &kernels, &o_types); + crate::builder::gpu_wrapper::gen_image_wrapper_module(&cgcx); } // What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper: @@ -248,7 +248,7 @@ fn gen_define_handling<'ll>( o_types } -fn declare_offload_fn<'ll>( +pub(crate) fn declare_offload_fn<'ll>( cx: &'ll SimpleCx<'_>, name: &str, ty: &'ll llvm::Type, diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_wrapper.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_wrapper.rs new file mode 100644 index 0000000000000..f7cf6c906392f --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_wrapper.rs @@ -0,0 +1,110 @@ +use std::ffi::CString; + +use llvm::Linkage::*; +use rustc_abi::{AddressSpace, Align}; +use rustc_codegen_ssa::back::write::CodegenContext; +use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; + +use crate::builder::gpu_offload::*; +use crate::llvm::{self, Linkage, Type, Visibility}; +use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx}; + +pub(crate) fn create_struct_ty<'ll>( + cx: &'ll SimpleCx<'_>, + name: &str, + tys: &[&'ll llvm::Type], +) -> &'ll llvm::Type { + let entry_struct_name = CString::new(name).unwrap(); + unsafe { + let entry_struct = llvm::LLVMStructCreateNamed(cx.llcx, entry_struct_name.as_ptr()); + llvm::LLVMStructSetBody(entry_struct, tys.as_ptr(), tys.len() as u32, 0); + entry_struct + } +} + +pub(crate) fn add_global_decl<'ll>( + cx: &SimpleCx<'ll>, + ty: &'ll Type, + name: &str, + l: Linkage, + hidden: bool, +) -> &'ll llvm::Value { + let c_name = CString::new(name).unwrap(); + let llglobal: &'ll llvm::Value = llvm::add_global(cx.llmod, ty, &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, l); + if hidden { + llvm::set_visibility(llglobal, Visibility::Hidden); + } + llglobal +} + +// We don't copy types from other functions because we generate a new module and context. +// Bringing in types from other contexts would likely cause issues. +pub(crate) fn gen_image_wrapper_module(cgcx: &CodegenContext) { + let dl_cstr = CString::new("e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9").unwrap(); + let target_cstr = CString::new("amdgcn-amd-amdhsa").unwrap(); + let name = "offload.wrapper.module"; + let m: crate::ModuleLlvm = + ModuleLlvm::new_simple(name, dl_cstr.into_raw(), target_cstr.into_raw(), &cgcx).unwrap(); + let cx = SimpleCx::new(m.llmod(), m.llcx, cgcx.pointer_size); + let tptr = cx.type_ptr(); + let tptr1 = cx.type_ptr_ext(AddressSpace(1)); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let ti16 = cx.type_i16(); + + let entry_fields = [ti64, ti16, ti16, ti32, tptr, tptr, ti64, ti64, tptr]; + create_struct_ty(&cx, "__tgt_offload_entry", &entry_fields); + create_struct_ty(&cx, "__tgt_device_image", &[tptr, tptr, tptr, tptr]); + create_struct_ty(&cx, "__tgt_bin_desc", &[ti32, tptr, tptr, tptr]); + + let offload_entry_ty = add_tgt_offload_entry(&cx); + let offload_entry_arr = cx.type_array(offload_entry_ty, 0); + + let name = "__start_omp_offloading_entries"; + add_global_decl(&cx, offload_entry_arr, name, ExternalLinkage, true); + + let name = "__stop_omp_offloading_entries"; + add_global_decl(&cx, offload_entry_arr, name, ExternalLinkage, true); + + let name = "__dummy.omp_offloading_entries"; + let llglobal = add_global_decl(&cx, offload_entry_arr, name, InternalLinkage, false); + + let c_section_name = CString::new("omp_offloading_entries").unwrap(); + llvm::set_section(llglobal, &c_section_name); + let zeroinit = cx.const_null(offload_entry_arr); + llvm::set_initializer(llglobal, zeroinit); + + CString::new("llvm.compiler.used").unwrap(); + let arr_val = cx.const_array(tptr1, &[llglobal]); + let c_section_name = CString::new("llvm.metadata").unwrap(); + let llglobal = add_global(&cx, "llvm.compiler.used", arr_val, AppendingLinkage); + llvm::set_section(llglobal, &c_section_name); + llvm::set_global_constant(llglobal, false); + + //@llvm.compiler.used = appending global [1 x ptr] [ptr @__dummy.omp_offloading_entries], section "llvm.metadata" + + let mapper_fn_ty = cx.type_func(&[tptr], cx.type_void()); + declare_offload_fn(&cx, &"__tgt_register_lib", mapper_fn_ty); + declare_offload_fn(&cx, &"__tgt_unregister_lib", mapper_fn_ty); + declare_offload_fn(&cx, &"atexit", cx.type_func(&[tptr], ti32)); + + let unknown_txt = "11111111111111"; + let c_entry_name = CString::new(unknown_txt).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let llglobal = + add_unnamed_global(&cx, &".omp_offloading.device_image", initializer, InternalLinkage); + let c_section_name = CString::new(".llvm.offloading").unwrap(); + llvm::set_section(llglobal, &c_section_name); + llvm::set_alignment(llglobal, Align::EIGHT); + + unsafe { + llvm::LLVMPrintModuleToFile( + cx.llmod, + CString::new("rustmagic.openmp.image.wrapper.ll").unwrap().as_ptr(), + std::ptr::null_mut(), + ); + } +} diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index ee77774c68832..2563d2e18e99d 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -159,6 +159,23 @@ fn to_llvm_tls_model(tls_model: TlsModel) -> llvm::ThreadLocalMode { } } +// FIXME(offload): This method is not relying on a tcx. We might still want to try to share some of +// the logic with create_module, e.g. the target_data_layout handling. +pub(crate) unsafe fn create_simple_module<'ll>( + llcx: &'ll llvm::Context, + target_data_layout: *const i8, + target_triple: *const i8, + mod_name: &str, +) -> &'ll llvm::Module { + let mod_name = SmallCStr::new(mod_name); + let llmod = unsafe { llvm::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), llcx) }; + unsafe { + llvm::LLVMSetDataLayout(llmod, target_data_layout); + llvm::LLVMSetTarget(llmod, target_triple); + } + llmod +} + pub(crate) unsafe fn create_module<'ll>( tcx: TyCtxt<'_>, llcx: &'ll llvm::Context, diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index aaf21f9ada9a5..a077f63749136 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -388,6 +388,21 @@ unsafe impl Send for ModuleLlvm {} unsafe impl Sync for ModuleLlvm {} impl ModuleLlvm { + fn new_simple( + name: &str, + dl_cstr: *const i8, + target_cstr: *const i8, + cgcx: &CodegenContext, + ) -> Result { + unsafe { + let llcx = llvm::LLVMRustContextCreate(false); + let llmod_raw = context::create_simple_module(llcx, dl_cstr, target_cstr, name); + let dcx = cgcx.create_dcx(); + let tm = ModuleLlvm::tm_from_cgcx(cgcx, name, dcx.handle())?; + Ok(ModuleLlvm { llmod_raw, llcx, tm: ManuallyDrop::new(tm) }) + } + } + fn new(tcx: TyCtxt<'_>, mod_name: &str) -> Self { unsafe { let llcx = llvm::LLVMRustContextCreate(tcx.sess.fewer_names()); diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index edfb29dd1be72..755cb0fe5501f 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1009,12 +1009,20 @@ unsafe extern "C" { ModuleID: *const c_char, C: &Context, ) -> &Module; - pub(crate) safe fn LLVMCloneModule(M: &Module) -> &Module; + pub(crate) fn LLVMPrintModuleToFile( + M: &Module, + Name: *const c_char, + Error_message: *mut c_char, + ); + pub(crate) fn LLVMCloneModule(M: &Module) -> &Module; + pub(crate) fn LLVMDisposeModule(M: &Module); /// Data layout. See Module::getDataLayout. pub(crate) fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char; pub(crate) fn LLVMSetDataLayout(M: &Module, Triple: *const c_char); + pub(crate) fn LLVMSetTarget(M: &Module, Name: *const c_char); + /// Append inline assembly to a module. See `Module::appendModuleInlineAsm`. pub(crate) fn LLVMAppendModuleInlineAsm( M: &Module, @@ -1175,6 +1183,12 @@ unsafe extern "C" { // Operations on global variables pub(crate) safe fn LLVMIsAGlobalVariable(GlobalVar: &Value) -> Option<&Value>; pub(crate) fn LLVMAddGlobal<'a>(M: &'a Module, Ty: &'a Type, Name: *const c_char) -> &'a Value; + pub(crate) fn LLVMAddGlobalInAddressSpace<'a>( + M: &'a Module, + Ty: &'a Type, + Name: *const c_char, + addrspace: c_uint, + ) -> &'a Value; pub(crate) fn LLVMGetNamedGlobal(M: &Module, Name: *const c_char) -> Option<&Value>; pub(crate) fn LLVMGetFirstGlobal(M: &Module) -> Option<&Value>; pub(crate) fn LLVMGetNextGlobal(GlobalVar: &Value) -> Option<&Value>; diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs index 154ba4fd69018..a9035ef231f2d 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs @@ -251,6 +251,15 @@ pub(crate) fn add_global<'a>(llmod: &'a Module, ty: &'a Type, name_cstr: &CStr) unsafe { LLVMAddGlobal(llmod, ty, name_cstr.as_ptr()) } } +pub(crate) fn add_global_in_addrspace<'a>( + llmod: &'a Module, + ty: &'a Type, + name_cstr: &CStr, + addrspace: c_uint, +) -> &'a Value { + unsafe { LLVMAddGlobalInAddressSpace(llmod, ty, name_cstr.as_ptr(), addrspace) } +} + pub(crate) fn set_initializer(llglobal: &Value, constant_val: &Value) { unsafe { LLVMSetInitializer(llglobal, constant_val); diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 0fb987bdf82ed..3bcd417941e96 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -405,6 +405,8 @@ fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) { ("mips64" | "mips64r6", _) => false, // Selection bug ("nvptx64", _) => false, + // Unsupported https://github.com/llvm/llvm-project/issues/121122 + ("amdgpu", _) => false, // ABI bugs et al. (full // list at ) ("powerpc" | "powerpc64", _) => false, diff --git a/src/tools/run-make-support/src/external_deps/llvm.rs b/src/tools/run-make-support/src/external_deps/llvm.rs index 9a6e35da3fe20..939160d9f41d8 100644 --- a/src/tools/run-make-support/src/external_deps/llvm.rs +++ b/src/tools/run-make-support/src/external_deps/llvm.rs @@ -60,6 +60,12 @@ pub fn llvm_pdbutil() -> LlvmPdbutil { LlvmPdbutil::new() } +/// Construct a new `llvm-as` invocation. This assumes that `llvm-as` is available +/// at `$LLVM_BIN_DIR/llvm-as`. +pub fn llvm_as() -> LlvmAs { + LlvmAs::new() +} + /// Construct a new `llvm-dis` invocation. This assumes that `llvm-dis` is available /// at `$LLVM_BIN_DIR/llvm-dis`. pub fn llvm_dis() -> LlvmDis { @@ -135,6 +141,13 @@ pub struct LlvmPdbutil { cmd: Command, } +/// A `llvm-as` invocation builder. +#[derive(Debug)] +#[must_use] +pub struct LlvmAs { + cmd: Command, +} + /// A `llvm-dis` invocation builder. #[derive(Debug)] #[must_use] @@ -158,6 +171,7 @@ crate::macros::impl_common_helpers!(LlvmNm); crate::macros::impl_common_helpers!(LlvmBcanalyzer); crate::macros::impl_common_helpers!(LlvmDwarfdump); crate::macros::impl_common_helpers!(LlvmPdbutil); +crate::macros::impl_common_helpers!(LlvmAs); crate::macros::impl_common_helpers!(LlvmDis); crate::macros::impl_common_helpers!(LlvmObjcopy); @@ -441,6 +455,22 @@ impl LlvmObjcopy { } } +impl LlvmAs { + /// Construct a new `llvm-as` invocation. This assumes that `llvm-as` is available + /// at `$LLVM_BIN_DIR/llvm-as`. + pub fn new() -> Self { + let llvm_as = llvm_bin_dir().join("llvm-as"); + let cmd = Command::new(llvm_as); + Self { cmd } + } + + /// Provide an input file. + pub fn input>(&mut self, path: P) -> &mut Self { + self.cmd.arg(path.as_ref()); + self + } +} + impl LlvmDis { /// Construct a new `llvm-dis` invocation. This assumes that `llvm-dis` is available /// at `$LLVM_BIN_DIR/llvm-dis`. diff --git a/src/tools/run-make-support/src/external_deps/rustc.rs b/src/tools/run-make-support/src/external_deps/rustc.rs index 08ba1388dc148..60d3366ee98c8 100644 --- a/src/tools/run-make-support/src/external_deps/rustc.rs +++ b/src/tools/run-make-support/src/external_deps/rustc.rs @@ -173,6 +173,12 @@ impl Rustc { self } + /// This flag enables LTO in the specified form. + pub fn lto(&mut self, option: &str) -> &mut Self { + self.cmd.arg(format!("-Clto={option}")); + self + } + /// This flag defers LTO optimizations to the linker. pub fn linker_plugin_lto(&mut self, option: &str) -> &mut Self { self.cmd.arg(format!("-Clinker-plugin-lto={option}")); diff --git a/src/tools/run-make-support/src/lib.rs b/src/tools/run-make-support/src/lib.rs index 29cd6c4ad1591..d6d2551ac59eb 100644 --- a/src/tools/run-make-support/src/lib.rs +++ b/src/tools/run-make-support/src/lib.rs @@ -61,6 +61,12 @@ pub use crate::external_deps::c_cxx_compiler::{ pub use crate::external_deps::cargo::cargo; pub use crate::external_deps::clang::{Clang, clang}; pub use crate::external_deps::htmldocck::htmldocck; +//pub use llvm::{ +// LlvmAr, LlvmBcanalyzer, LlvmDis, LlvmDwarfdump, LlvmFilecheck, LlvmNm, LlvmObjcopy, +// LlvmObjdump, LlvmProfdata, LlvmReadobj, llvm_ar, llvm_as, llvm_bcanalyzer, llvm_dis, +// llvm_dwarfdump, llvm_filecheck, llvm_nm, llvm_objcopy, llvm_objdump, llvm_profdata, +// llvm_readobj, +//}; pub use crate::external_deps::llvm::{ self, LlvmAr, LlvmBcanalyzer, LlvmDis, LlvmDwarfdump, LlvmFilecheck, LlvmNm, LlvmObjcopy, LlvmObjdump, LlvmProfdata, LlvmReadobj, llvm_ar, llvm_bcanalyzer, llvm_dis, llvm_dwarfdump, diff --git a/tests/run-make/cross-lang-lto-clang/rmake.rs b/tests/run-make/cross-lang-lto-clang/rmake.rs index 3fed6ea20667a..0c4383e2cd815 100644 --- a/tests/run-make/cross-lang-lto-clang/rmake.rs +++ b/tests/run-make/cross-lang-lto-clang/rmake.rs @@ -28,7 +28,16 @@ static C_NEVER_INLINED_PATTERN: &'static str = "bl.*"; static C_NEVER_INLINED_PATTERN: &'static str = "call.*c_never_inlined"; fn main() { + test_lto(false); + test_lto(true); +} + +fn test_lto(fat_lto: bool) { + let lto = if fat_lto { "fat" } else { "thin" }; + let clang_lto = if fat_lto { "full" } else { "thin" }; + rustc() + .lto(lto) .linker_plugin_lto("on") .output(static_lib_name("rustlib-xlto")) .opt_level("2") @@ -36,7 +45,7 @@ fn main() { .input("rustlib.rs") .run(); clang() - .lto("thin") + .lto(clang_lto) .use_ld("lld") .arg("-lrustlib-xlto") .out_exe("cmain") @@ -57,9 +66,10 @@ fn main() { .input("cmain") .run() .assert_stdout_contains_regex(RUST_NEVER_INLINED_PATTERN); - clang().input("clib.c").lto("thin").arg("-c").out_exe("clib.o").arg("-O2").run(); + clang().input("clib.c").lto(clang_lto).arg("-c").out_exe("clib.o").arg("-O2").run(); llvm_ar().obj_to_ar().output_input(static_lib_name("xyz"), "clib.o").run(); rustc() + .lto(lto) .linker_plugin_lto("on") .opt_level("2") .linker(&env_var("CLANG")) @@ -72,9 +82,12 @@ fn main() { .input("rsmain") .run() .assert_stdout_not_contains_regex(C_ALWAYS_INLINED_PATTERN); - llvm_objdump() - .disassemble() - .input("rsmain") - .run() - .assert_stdout_contains_regex(C_NEVER_INLINED_PATTERN); + + let dump = llvm_objdump().disassemble().input("rsmain").run(); + if !fat_lto { + dump.assert_stdout_contains_regex(C_NEVER_INLINED_PATTERN); + } else { + // fat lto inlines this anyway + dump.assert_stdout_not_contains_regex(C_NEVER_INLINED_PATTERN); + } } diff --git a/tests/run-make/fat-then-thin-lto/lib.rs b/tests/run-make/fat-then-thin-lto/lib.rs new file mode 100644 index 0000000000000..3091988368628 --- /dev/null +++ b/tests/run-make/fat-then-thin-lto/lib.rs @@ -0,0 +1,8 @@ +#![feature(no_core, lang_items)] +#![no_core] +#![crate_type = "rlib"] + +#[lang = "sized"] +trait Sized {} + +pub fn foo() {} diff --git a/tests/run-make/fat-then-thin-lto/main.rs b/tests/run-make/fat-then-thin-lto/main.rs new file mode 100644 index 0000000000000..a3f2e18158bc0 --- /dev/null +++ b/tests/run-make/fat-then-thin-lto/main.rs @@ -0,0 +1,11 @@ +#![allow(internal_features)] +#![feature(no_core, lang_items)] +#![no_core] +#![crate_type = "cdylib"] + +extern crate lib; + +#[unsafe(no_mangle)] +pub fn bar() { + lib::foo(); +} diff --git a/tests/run-make/fat-then-thin-lto/rmake.rs b/tests/run-make/fat-then-thin-lto/rmake.rs new file mode 100644 index 0000000000000..ef4f26689d4e8 --- /dev/null +++ b/tests/run-make/fat-then-thin-lto/rmake.rs @@ -0,0 +1,25 @@ +// Compile a library with lto=fat, then compile a binary with lto=thin +// and check that lto is applied with the library. +// The goal is to mimic the standard library being build with lto=fat +// and allowing users to build with lto=thin. + +//@ only-x86_64-unknown-linux-gnu + +use run_make_support::{dynamic_lib_name, llvm_objdump, rustc}; + +fn main() { + rustc().input("lib.rs").opt_level("3").lto("fat").run(); + rustc().input("main.rs").panic("abort").opt_level("3").lto("thin").run(); + + llvm_objdump() + .input(dynamic_lib_name("main")) + .arg("--disassemble-symbols=bar") + .run() + // The called function should be inlined. + // Check that we have a ret (to detect tail + // calls with a jmp) and no call. + .assert_stdout_contains("bar") + .assert_stdout_contains("ret") + .assert_stdout_not_contains("foo") + .assert_stdout_not_contains("call"); +} diff --git a/tests/run-make/linker-plugin-lto-fat/ir.ll b/tests/run-make/linker-plugin-lto-fat/ir.ll new file mode 100644 index 0000000000000..fa3dbdd4e088d --- /dev/null +++ b/tests/run-make/linker-plugin-lto-fat/ir.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @ir_callee() { + ret void +} diff --git a/tests/run-make/linker-plugin-lto-fat/main.rs b/tests/run-make/linker-plugin-lto-fat/main.rs new file mode 100644 index 0000000000000..3a7b02f4b92e3 --- /dev/null +++ b/tests/run-make/linker-plugin-lto-fat/main.rs @@ -0,0 +1,17 @@ +#![feature(no_core, lang_items)] +#![no_core] +#![crate_type = "cdylib"] + +#[lang = "sized"] +trait Sized {} + +extern "C" { + fn ir_callee(); +} + +#[no_mangle] +extern "C" fn rs_foo() { + unsafe { + ir_callee(); + } +} diff --git a/tests/run-make/linker-plugin-lto-fat/rmake.rs b/tests/run-make/linker-plugin-lto-fat/rmake.rs new file mode 100644 index 0000000000000..0cfc799d2aaa1 --- /dev/null +++ b/tests/run-make/linker-plugin-lto-fat/rmake.rs @@ -0,0 +1,32 @@ +// Check that -C lto=fat with -C linker-plugin-lto actually works and can inline functions. +// A library is created from LLVM IR, defining a single function. Then a dylib is compiled, +// linking to the library and calling the function from the library. +// The function from the library should end up inlined and disappear from the output. + +//@ only-x86_64-unknown-linux-gnu +//@ needs-rust-lld + +use run_make_support::{dynamic_lib_name, llvm_as, llvm_objdump, rustc}; + +fn main() { + llvm_as().input("ir.ll").run(); + rustc() + .input("main.rs") + .opt_level("3") + .lto("fat") + .linker_plugin_lto("on") + .link_arg("ir.bc") + .arg("-Zlinker-features=+lld") + .run(); + + llvm_objdump() + .input(dynamic_lib_name("main")) + .arg("--disassemble-symbols=rs_foo") + .run() + // The called function should be inlined. + // Check that we have a ret (to detect tail + // calls with a jmp) and no call. + .assert_stdout_contains("foo") + .assert_stdout_contains("ret") + .assert_stdout_not_contains("call"); +}